xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ca3fa75bd98f564f8d45d53ada4cc05613614b83)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*ca3fa75bSLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.118 1999/07/26 01:16:26 curfman Exp curfman $";
38965ea79SLois Curfman McInnes #endif
48965ea79SLois Curfman McInnes 
5ed3cc1f0SBarry Smith /*
6ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
7ed3cc1f0SBarry Smith */
8ed3cc1f0SBarry Smith 
970f55243SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h"
10f5eb4b81SSatish Balay #include "src/vec/vecimpl.h"
118965ea79SLois Curfman McInnes 
127ef1d9bdSSatish Balay extern int MatSetUpMultiply_MPIDense(Mat);
137ef1d9bdSSatish Balay 
145615d1e5SSatish Balay #undef __FUNC__
155615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIDense"
168f6be9afSLois Curfman McInnes int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
178965ea79SLois Curfman McInnes {
1839b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *) mat->data;
1939b7565bSBarry Smith   int          ierr, i, j, rstart = A->rstart, rend = A->rend, row;
2039b7565bSBarry Smith   int          roworiented = A->roworiented;
218965ea79SLois Curfman McInnes 
223a40ed3dSBarry Smith   PetscFunctionBegin;
238965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
245ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
25a8c6a408SBarry Smith     if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
268965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
278965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
2839b7565bSBarry Smith       if (roworiented) {
2939b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
303a40ed3dSBarry Smith       } else {
318965ea79SLois Curfman McInnes         for ( j=0; j<n; j++ ) {
325ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
33a8c6a408SBarry Smith           if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
3439b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
3539b7565bSBarry Smith         }
368965ea79SLois Curfman McInnes       }
373a40ed3dSBarry Smith     } else {
383782ba37SSatish Balay       if ( !A->donotstash) {
3939b7565bSBarry Smith         if (roworiented) {
408798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
41d36fbae8SSatish Balay         } else {
428798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
4339b7565bSBarry Smith         }
44b49de8d1SLois Curfman McInnes       }
45b49de8d1SLois Curfman McInnes     }
463782ba37SSatish Balay   }
473a40ed3dSBarry Smith   PetscFunctionReturn(0);
48b49de8d1SLois Curfman McInnes }
49b49de8d1SLois Curfman McInnes 
505615d1e5SSatish Balay #undef __FUNC__
515615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIDense"
528f6be9afSLois Curfman McInnes int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
53b49de8d1SLois Curfman McInnes {
54b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
55b49de8d1SLois Curfman McInnes   int          ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row;
56b49de8d1SLois Curfman McInnes 
573a40ed3dSBarry Smith   PetscFunctionBegin;
58b49de8d1SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
59a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
60a8c6a408SBarry Smith     if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
61b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
62b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
63b49de8d1SLois Curfman McInnes       for ( j=0; j<n; j++ ) {
64a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
65a8c6a408SBarry Smith         if (idxn[j] >= mdn->N) {
66a8c6a408SBarry Smith           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
67a8c6a408SBarry Smith         }
68b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
69b49de8d1SLois Curfman McInnes       }
70a8c6a408SBarry Smith     } else {
71a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
728965ea79SLois Curfman McInnes     }
738965ea79SLois Curfman McInnes   }
743a40ed3dSBarry Smith   PetscFunctionReturn(0);
758965ea79SLois Curfman McInnes }
768965ea79SLois Curfman McInnes 
775615d1e5SSatish Balay #undef __FUNC__
785615d1e5SSatish Balay #define __FUNC__ "MatGetArray_MPIDense"
798f6be9afSLois Curfman McInnes int MatGetArray_MPIDense(Mat A,Scalar **array)
80ff14e315SSatish Balay {
81ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
82ff14e315SSatish Balay   int          ierr;
83ff14e315SSatish Balay 
843a40ed3dSBarry Smith   PetscFunctionBegin;
85ff14e315SSatish Balay   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
863a40ed3dSBarry Smith   PetscFunctionReturn(0);
87ff14e315SSatish Balay }
88ff14e315SSatish Balay 
895615d1e5SSatish Balay #undef __FUNC__
90*ca3fa75bSLois Curfman McInnes #define __FUNC__ "MatGetSubMatrix_MPIDense"
91*ca3fa75bSLois Curfman McInnes static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
92*ca3fa75bSLois Curfman McInnes {
93*ca3fa75bSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd;
94*ca3fa75bSLois Curfman McInnes   Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data;
95*ca3fa75bSLois Curfman McInnes   int          i, j, ierr, *irow, *icol, nrows, ncols, nlrows, nlcols;
96*ca3fa75bSLois Curfman McInnes   Scalar       *av, *bv, *v = lmat->v;
97*ca3fa75bSLois Curfman McInnes   Mat          newmat;
98*ca3fa75bSLois Curfman McInnes 
99*ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
100*ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
101*ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
102*ca3fa75bSLois Curfman McInnes   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
103*ca3fa75bSLois Curfman McInnes   ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr);
104*ca3fa75bSLois Curfman McInnes 
105*ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
106*ca3fa75bSLois Curfman McInnes      to comfirm that it is OK ... temporarily just support retaining all rows on each proc. */
107*ca3fa75bSLois Curfman McInnes 
108*ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
109*ca3fa75bSLois Curfman McInnes   if (nlrows != nrows) SETERRQ(PETSC_ERR_ARG_SIZ,0,
110*ca3fa75bSLois Curfman McInnes     "Currently supports only submatrix with ALL rows of additional matrix (same partitioning)");
111*ca3fa75bSLois Curfman McInnes 
112*ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
113*ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
114*ca3fa75bSLois Curfman McInnes     int n_cols,n_rows;
115*ca3fa75bSLois Curfman McInnes     ierr = MatGetSize(*B,&n_rows,&n_cols);CHKERRQ(ierr);
116*ca3fa75bSLois Curfman McInnes     if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size");
117*ca3fa75bSLois Curfman McInnes     newmat = *B;
118*ca3fa75bSLois Curfman McInnes   } else {
119*ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
120*ca3fa75bSLois Curfman McInnes     ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr);
121*ca3fa75bSLois Curfman McInnes   }
122*ca3fa75bSLois Curfman McInnes 
123*ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
124*ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense *) newmat->data;
125*ca3fa75bSLois Curfman McInnes   bv = ((Mat_SeqDense *)newmatd->A->data)->v;
126*ca3fa75bSLois Curfman McInnes 
127*ca3fa75bSLois Curfman McInnes   for ( i=0; i<ncols; i++ ) {
128*ca3fa75bSLois Curfman McInnes     av = v + nlrows*icol[i];
129*ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++ ) {
130*ca3fa75bSLois Curfman McInnes       *bv++ = av[irow[j]];
131*ca3fa75bSLois Curfman McInnes     }
132*ca3fa75bSLois Curfman McInnes   }
133*ca3fa75bSLois Curfman McInnes 
134*ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
135*ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
136*ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
137*ca3fa75bSLois Curfman McInnes 
138*ca3fa75bSLois Curfman McInnes   /* Free work space */
139*ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
140*ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
141*ca3fa75bSLois Curfman McInnes   *B = newmat;
142*ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
143*ca3fa75bSLois Curfman McInnes }
144*ca3fa75bSLois Curfman McInnes 
145*ca3fa75bSLois Curfman McInnes #undef __FUNC__
1465615d1e5SSatish Balay #define __FUNC__ "MatRestoreArray_MPIDense"
1478f6be9afSLois Curfman McInnes int MatRestoreArray_MPIDense(Mat A,Scalar **array)
148ff14e315SSatish Balay {
1493a40ed3dSBarry Smith   PetscFunctionBegin;
1503a40ed3dSBarry Smith   PetscFunctionReturn(0);
151ff14e315SSatish Balay }
152ff14e315SSatish Balay 
1535615d1e5SSatish Balay #undef __FUNC__
1545615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIDense"
1558f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
1568965ea79SLois Curfman McInnes {
15739ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
1588965ea79SLois Curfman McInnes   MPI_Comm     comm = mat->comm;
159d36fbae8SSatish Balay   int          ierr,nstash,reallocs;
1608965ea79SLois Curfman McInnes   InsertMode   addv;
1618965ea79SLois Curfman McInnes 
1623a40ed3dSBarry Smith   PetscFunctionBegin;
1638965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
164ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
1657056b6fcSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
166a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs");
1678965ea79SLois Curfman McInnes   }
168e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
1698965ea79SLois Curfman McInnes 
1708798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
1718798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
172d36fbae8SSatish Balay   PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",
173d36fbae8SSatish Balay            nstash,reallocs);
1743a40ed3dSBarry Smith   PetscFunctionReturn(0);
1758965ea79SLois Curfman McInnes }
1768965ea79SLois Curfman McInnes 
1775615d1e5SSatish Balay #undef __FUNC__
1785615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIDense"
1798f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
1808965ea79SLois Curfman McInnes {
18139ddd567SLois Curfman McInnes   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
1827ef1d9bdSSatish Balay   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
1837ef1d9bdSSatish Balay   Scalar       *val;
184e0fa3b82SLois Curfman McInnes   InsertMode   addv=mat->insertmode;
1858965ea79SLois Curfman McInnes 
1863a40ed3dSBarry Smith   PetscFunctionBegin;
1878965ea79SLois Curfman McInnes   /*  wait on receives */
1887ef1d9bdSSatish Balay   while (1) {
1898798bf22SSatish Balay     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1907ef1d9bdSSatish Balay     if (!flg) break;
1918965ea79SLois Curfman McInnes 
1927ef1d9bdSSatish Balay     for ( i=0; i<n; ) {
1937ef1d9bdSSatish Balay       /* Now identify the consecutive vals belonging to the same row */
1947ef1d9bdSSatish Balay       for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
1957ef1d9bdSSatish Balay       if (j < n) ncols = j-i;
1967ef1d9bdSSatish Balay       else       ncols = n-i;
1977ef1d9bdSSatish Balay       /* Now assemble all these values with a single function call */
1987ef1d9bdSSatish Balay       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1997ef1d9bdSSatish Balay       i = j;
2008965ea79SLois Curfman McInnes     }
2017ef1d9bdSSatish Balay   }
2028798bf22SSatish Balay   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
2038965ea79SLois Curfman McInnes 
20439ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
20539ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
2068965ea79SLois Curfman McInnes 
2076d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
20839ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2098965ea79SLois Curfman McInnes   }
2103a40ed3dSBarry Smith   PetscFunctionReturn(0);
2118965ea79SLois Curfman McInnes }
2128965ea79SLois Curfman McInnes 
2135615d1e5SSatish Balay #undef __FUNC__
2145615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIDense"
2158f6be9afSLois Curfman McInnes int MatZeroEntries_MPIDense(Mat A)
2168965ea79SLois Curfman McInnes {
2173a40ed3dSBarry Smith   int          ierr;
21839ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *) A->data;
2193a40ed3dSBarry Smith 
2203a40ed3dSBarry Smith   PetscFunctionBegin;
2213a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
2223a40ed3dSBarry Smith   PetscFunctionReturn(0);
2238965ea79SLois Curfman McInnes }
2248965ea79SLois Curfman McInnes 
2255615d1e5SSatish Balay #undef __FUNC__
2265615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIDense"
2278f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIDense(Mat A,int *bs)
2284e220ebcSLois Curfman McInnes {
2293a40ed3dSBarry Smith   PetscFunctionBegin;
2304e220ebcSLois Curfman McInnes   *bs = 1;
2313a40ed3dSBarry Smith   PetscFunctionReturn(0);
2324e220ebcSLois Curfman McInnes }
2334e220ebcSLois Curfman McInnes 
2348965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2358965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2368965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
2373501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
2388965ea79SLois Curfman McInnes    routine.
2398965ea79SLois Curfman McInnes */
2405615d1e5SSatish Balay #undef __FUNC__
2415615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIDense"
2428f6be9afSLois Curfman McInnes int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
2438965ea79SLois Curfman McInnes {
24439ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense *) A->data;
2458965ea79SLois Curfman McInnes   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
2468965ea79SLois Curfman McInnes   int            *procs,*nprocs,j,found,idx,nsends,*work;
2478965ea79SLois Curfman McInnes   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
2488965ea79SLois Curfman McInnes   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
2498965ea79SLois Curfman McInnes   int            *lens,imdex,*lrows,*values;
2508965ea79SLois Curfman McInnes   MPI_Comm       comm = A->comm;
2518965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
2528965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
2538965ea79SLois Curfman McInnes   IS             istmp;
2548965ea79SLois Curfman McInnes 
2553a40ed3dSBarry Smith   PetscFunctionBegin;
25677c4ece6SBarry Smith   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
2578965ea79SLois Curfman McInnes   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
2588965ea79SLois Curfman McInnes 
2598965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
2600452661fSBarry Smith   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
261549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
262549d3d68SSatish Balay   procs  = nprocs + size;
2630452661fSBarry Smith   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
2648965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
2658965ea79SLois Curfman McInnes     idx = rows[i];
2668965ea79SLois Curfman McInnes     found = 0;
2678965ea79SLois Curfman McInnes     for ( j=0; j<size; j++ ) {
2688965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
2698965ea79SLois Curfman McInnes         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
2708965ea79SLois Curfman McInnes       }
2718965ea79SLois Curfman McInnes     }
272a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
2738965ea79SLois Curfman McInnes   }
2748965ea79SLois Curfman McInnes   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
2758965ea79SLois Curfman McInnes 
2768965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
2770452661fSBarry Smith   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
278ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2798965ea79SLois Curfman McInnes   nrecvs = work[rank];
280ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
2818965ea79SLois Curfman McInnes   nmax   = work[rank];
282606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
2838965ea79SLois Curfman McInnes 
2848965ea79SLois Curfman McInnes   /* post receives:   */
2853a40ed3dSBarry Smith   rvalues    = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
2863a40ed3dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
2878965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
288ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
2898965ea79SLois Curfman McInnes   }
2908965ea79SLois Curfman McInnes 
2918965ea79SLois Curfman McInnes   /* do sends:
2928965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
2938965ea79SLois Curfman McInnes          the ith processor
2948965ea79SLois Curfman McInnes   */
2950452661fSBarry Smith   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
2967056b6fcSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
2970452661fSBarry Smith   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
2988965ea79SLois Curfman McInnes   starts[0]  = 0;
2998965ea79SLois Curfman McInnes   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3008965ea79SLois Curfman McInnes   for ( i=0; i<N; i++ ) {
3018965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
3028965ea79SLois Curfman McInnes   }
3038965ea79SLois Curfman McInnes   ISRestoreIndices(is,&rows);
3048965ea79SLois Curfman McInnes 
3058965ea79SLois Curfman McInnes   starts[0] = 0;
3068965ea79SLois Curfman McInnes   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
3078965ea79SLois Curfman McInnes   count = 0;
3088965ea79SLois Curfman McInnes   for ( i=0; i<size; i++ ) {
3098965ea79SLois Curfman McInnes     if (procs[i]) {
310ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
3118965ea79SLois Curfman McInnes     }
3128965ea79SLois Curfman McInnes   }
313606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
3148965ea79SLois Curfman McInnes 
3158965ea79SLois Curfman McInnes   base = owners[rank];
3168965ea79SLois Curfman McInnes 
3178965ea79SLois Curfman McInnes   /*  wait on receives */
3180452661fSBarry Smith   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
3198965ea79SLois Curfman McInnes   source = lens + nrecvs;
3208965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3218965ea79SLois Curfman McInnes   while (count) {
322ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
3238965ea79SLois Curfman McInnes     /* unpack receives into our local space */
324ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
3258965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
3268965ea79SLois Curfman McInnes     lens[imdex]  = n;
3278965ea79SLois Curfman McInnes     slen += n;
3288965ea79SLois Curfman McInnes     count--;
3298965ea79SLois Curfman McInnes   }
330606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
3318965ea79SLois Curfman McInnes 
3328965ea79SLois Curfman McInnes   /* move the data into the send scatter */
3330452661fSBarry Smith   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
3348965ea79SLois Curfman McInnes   count = 0;
3358965ea79SLois Curfman McInnes   for ( i=0; i<nrecvs; i++ ) {
3368965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3378965ea79SLois Curfman McInnes     for ( j=0; j<lens[i]; j++ ) {
3388965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3398965ea79SLois Curfman McInnes     }
3408965ea79SLois Curfman McInnes   }
341606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
342606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
343606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
344606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
3458965ea79SLois Curfman McInnes 
3468965ea79SLois Curfman McInnes   /* actually zap the local rows */
347029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
3488965ea79SLois Curfman McInnes   PLogObjectParent(A,istmp);
349606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
3508965ea79SLois Curfman McInnes   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
3518965ea79SLois Curfman McInnes   ierr = ISDestroy(istmp);CHKERRQ(ierr);
3528965ea79SLois Curfman McInnes 
3538965ea79SLois Curfman McInnes   /* wait on sends */
3548965ea79SLois Curfman McInnes   if (nsends) {
3557056b6fcSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
356ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
357606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
3588965ea79SLois Curfman McInnes   }
359606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
360606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
3618965ea79SLois Curfman McInnes 
3623a40ed3dSBarry Smith   PetscFunctionReturn(0);
3638965ea79SLois Curfman McInnes }
3648965ea79SLois Curfman McInnes 
3655615d1e5SSatish Balay #undef __FUNC__
3665615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIDense"
3678f6be9afSLois Curfman McInnes int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
3688965ea79SLois Curfman McInnes {
36939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3708965ea79SLois Curfman McInnes   int          ierr;
371c456f294SBarry Smith 
3723a40ed3dSBarry Smith   PetscFunctionBegin;
37343a90d84SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
37443a90d84SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
37544cd7ae7SLois Curfman McInnes   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
3763a40ed3dSBarry Smith   PetscFunctionReturn(0);
3778965ea79SLois Curfman McInnes }
3788965ea79SLois Curfman McInnes 
3795615d1e5SSatish Balay #undef __FUNC__
3805615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIDense"
3818f6be9afSLois Curfman McInnes int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
3828965ea79SLois Curfman McInnes {
38339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
3848965ea79SLois Curfman McInnes   int          ierr;
385c456f294SBarry Smith 
3863a40ed3dSBarry Smith   PetscFunctionBegin;
38743a90d84SBarry Smith   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
38843a90d84SBarry Smith   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
38944cd7ae7SLois Curfman McInnes   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
3903a40ed3dSBarry Smith   PetscFunctionReturn(0);
3918965ea79SLois Curfman McInnes }
3928965ea79SLois Curfman McInnes 
3935615d1e5SSatish Balay #undef __FUNC__
3945615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIDense"
3958f6be9afSLois Curfman McInnes int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy)
396096963f5SLois Curfman McInnes {
397096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
398096963f5SLois Curfman McInnes   int          ierr;
3993501a2bdSLois Curfman McInnes   Scalar       zero = 0.0;
400096963f5SLois Curfman McInnes 
4013a40ed3dSBarry Smith   PetscFunctionBegin;
4023501a2bdSLois Curfman McInnes   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
40344cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
404537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
405537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
4063a40ed3dSBarry Smith   PetscFunctionReturn(0);
407096963f5SLois Curfman McInnes }
408096963f5SLois Curfman McInnes 
4095615d1e5SSatish Balay #undef __FUNC__
4105615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIDense"
4118f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
412096963f5SLois Curfman McInnes {
413096963f5SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
414096963f5SLois Curfman McInnes   int          ierr;
415096963f5SLois Curfman McInnes 
4163a40ed3dSBarry Smith   PetscFunctionBegin;
4173501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
41844cd7ae7SLois Curfman McInnes   ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
419537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
420537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
4213a40ed3dSBarry Smith   PetscFunctionReturn(0);
422096963f5SLois Curfman McInnes }
423096963f5SLois Curfman McInnes 
4245615d1e5SSatish Balay #undef __FUNC__
4255615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIDense"
4268f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIDense(Mat A,Vec v)
4278965ea79SLois Curfman McInnes {
42839ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
429096963f5SLois Curfman McInnes   Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data;
43044cd7ae7SLois Curfman McInnes   int          ierr, len, i, n, m = a->m, radd;
43144cd7ae7SLois Curfman McInnes   Scalar       *x, zero = 0.0;
432ed3cc1f0SBarry Smith 
4333a40ed3dSBarry Smith   PetscFunctionBegin;
43444cd7ae7SLois Curfman McInnes   VecSet(&zero,v);
435096963f5SLois Curfman McInnes   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
436096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
437a8c6a408SBarry Smith   if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec");
43844cd7ae7SLois Curfman McInnes   len = PetscMin(aloc->m,aloc->n);
4397ddc982cSLois Curfman McInnes   radd = a->rstart*m;
44044cd7ae7SLois Curfman McInnes   for ( i=0; i<len; i++ ) {
441096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
442096963f5SLois Curfman McInnes   }
4439a8c540fSSatish Balay   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
4443a40ed3dSBarry Smith   PetscFunctionReturn(0);
4458965ea79SLois Curfman McInnes }
4468965ea79SLois Curfman McInnes 
4475615d1e5SSatish Balay #undef __FUNC__
4485615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIDense"
449e1311b90SBarry Smith int MatDestroy_MPIDense(Mat mat)
4508965ea79SLois Curfman McInnes {
4513501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4528965ea79SLois Curfman McInnes   int          ierr;
453ed3cc1f0SBarry Smith 
4543a40ed3dSBarry Smith   PetscFunctionBegin;
45594d884c6SBarry Smith 
45694d884c6SBarry Smith   if (mat->mapping) {
45794d884c6SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
45894d884c6SBarry Smith   }
45994d884c6SBarry Smith   if (mat->bmapping) {
46094d884c6SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
46194d884c6SBarry Smith   }
462aa482453SBarry Smith #if defined(PETSC_USE_LOG)
463e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N);
4648965ea79SLois Curfman McInnes #endif
4658798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
466606d414cSSatish Balay   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
4673501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
4683501a2bdSLois Curfman McInnes   if (mdn->lvec)   VecDestroy(mdn->lvec);
4693501a2bdSLois Curfman McInnes   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
470622d7880SLois Curfman McInnes   if (mdn->factor) {
471606d414cSSatish Balay     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
472606d414cSSatish Balay     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
473606d414cSSatish Balay     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
474606d414cSSatish Balay     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
475622d7880SLois Curfman McInnes   }
476606d414cSSatish Balay   ierr = PetscFree(mdn);CHKERRQ(ierr);
47761b13de0SBarry Smith   if (mat->rmap) {
47861b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
47961b13de0SBarry Smith   }
48061b13de0SBarry Smith   if (mat->cmap) {
48161b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
48290f02eecSBarry Smith   }
4838965ea79SLois Curfman McInnes   PLogObjectDestroy(mat);
4840452661fSBarry Smith   PetscHeaderDestroy(mat);
4853a40ed3dSBarry Smith   PetscFunctionReturn(0);
4868965ea79SLois Curfman McInnes }
48739ddd567SLois Curfman McInnes 
4885615d1e5SSatish Balay #undef __FUNC__
4895615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_Binary"
49039ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer)
4918965ea79SLois Curfman McInnes {
49239ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
4938965ea79SLois Curfman McInnes   int          ierr;
4947056b6fcSBarry Smith 
4953a40ed3dSBarry Smith   PetscFunctionBegin;
49639ddd567SLois Curfman McInnes   if (mdn->size == 1) {
49739ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
4988965ea79SLois Curfman McInnes   }
499a8c6a408SBarry Smith   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
5003a40ed3dSBarry Smith   PetscFunctionReturn(0);
5018965ea79SLois Curfman McInnes }
5028965ea79SLois Curfman McInnes 
5035615d1e5SSatish Balay #undef __FUNC__
5045615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_ASCII"
50539ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer)
5068965ea79SLois Curfman McInnes {
50739ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data;
50877ed5343SBarry Smith   int          ierr, format, size = mdn->size, rank = mdn->rank;
5098965ea79SLois Curfman McInnes   FILE         *fd;
51019bcc07fSBarry Smith   ViewerType   vtype;
5118965ea79SLois Curfman McInnes 
5123a40ed3dSBarry Smith   PetscFunctionBegin;
5133a40ed3dSBarry Smith   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
51490ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
515888f2ed8SSatish Balay   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
516639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5174e220ebcSLois Curfman McInnes     MatInfo info;
518888f2ed8SSatish Balay     ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
51977c4ece6SBarry Smith     PetscSequentialPhaseBegin(mat->comm,1);
5204e220ebcSLois Curfman McInnes       fprintf(fd,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m,
5214e220ebcSLois Curfman McInnes          (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
522096963f5SLois Curfman McInnes       fflush(fd);
52377c4ece6SBarry Smith     PetscSequentialPhaseEnd(mat->comm,1);
5243501a2bdSLois Curfman McInnes     ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
5253a40ed3dSBarry Smith     PetscFunctionReturn(0);
52696f6c058SBarry Smith   } else if (format == VIEWER_FORMAT_ASCII_INFO) {
5273a40ed3dSBarry Smith     PetscFunctionReturn(0);
5288965ea79SLois Curfman McInnes   }
52977ed5343SBarry Smith 
5308965ea79SLois Curfman McInnes   if (size == 1) {
53139ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
5323a40ed3dSBarry Smith   } else {
5338965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
5348965ea79SLois Curfman McInnes     Mat          A;
53539ddd567SLois Curfman McInnes     int          M = mdn->M, N = mdn->N,m,row,i, nz, *cols;
53639ddd567SLois Curfman McInnes     Scalar       *vals;
53739ddd567SLois Curfman McInnes     Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data;
5388965ea79SLois Curfman McInnes 
5398965ea79SLois Curfman McInnes     if (!rank) {
5400513a670SBarry Smith       ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
5413a40ed3dSBarry Smith     } else {
5420513a670SBarry Smith       ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr);
5438965ea79SLois Curfman McInnes     }
5448965ea79SLois Curfman McInnes     PLogObjectParent(mat,A);
5458965ea79SLois Curfman McInnes 
54639ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
54739ddd567SLois Curfman McInnes        but it's quick for now */
54839ddd567SLois Curfman McInnes     row = mdn->rstart; m = Amdn->m;
5498965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
55039ddd567SLois Curfman McInnes       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
55139ddd567SLois Curfman McInnes       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
55239ddd567SLois Curfman McInnes       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
55339ddd567SLois Curfman McInnes       row++;
5548965ea79SLois Curfman McInnes     }
5558965ea79SLois Curfman McInnes 
5566d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5576d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5588965ea79SLois Curfman McInnes     if (!rank) {
55939ddd567SLois Curfman McInnes       ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr);
5608965ea79SLois Curfman McInnes     }
5618965ea79SLois Curfman McInnes     ierr = MatDestroy(A);CHKERRQ(ierr);
5628965ea79SLois Curfman McInnes   }
5633a40ed3dSBarry Smith   PetscFunctionReturn(0);
5648965ea79SLois Curfman McInnes }
5658965ea79SLois Curfman McInnes 
5665615d1e5SSatish Balay #undef __FUNC__
5675615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense"
568e1311b90SBarry Smith int MatView_MPIDense(Mat mat,Viewer viewer)
5698965ea79SLois Curfman McInnes {
57039ddd567SLois Curfman McInnes   int          ierr;
571bcd2baecSBarry Smith   ViewerType   vtype;
5728965ea79SLois Curfman McInnes 
573bcd2baecSBarry Smith   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
5743f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
57539ddd567SLois Curfman McInnes     ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr);
5763f1db9ecSBarry Smith   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
5773a40ed3dSBarry Smith     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
5785cd90555SBarry Smith   } else {
5795cd90555SBarry Smith     SETERRQ(1,1,"Viewer type not supported by PETSc object");
5808965ea79SLois Curfman McInnes   }
5813a40ed3dSBarry Smith   PetscFunctionReturn(0);
5828965ea79SLois Curfman McInnes }
5838965ea79SLois Curfman McInnes 
5845615d1e5SSatish Balay #undef __FUNC__
5855615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIDense"
5868f6be9afSLois Curfman McInnes int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
5878965ea79SLois Curfman McInnes {
5883501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
5893501a2bdSLois Curfman McInnes   Mat          mdn = mat->A;
5904e220ebcSLois Curfman McInnes   int          ierr;
5914e220ebcSLois Curfman McInnes   double       isend[5], irecv[5];
5928965ea79SLois Curfman McInnes 
5933a40ed3dSBarry Smith   PetscFunctionBegin;
5944e220ebcSLois Curfman McInnes   info->rows_global    = (double)mat->M;
5954e220ebcSLois Curfman McInnes   info->columns_global = (double)mat->N;
5964e220ebcSLois Curfman McInnes   info->rows_local     = (double)mat->m;
5974e220ebcSLois Curfman McInnes   info->columns_local  = (double)mat->N;
5984e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
5994e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
6004e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
6014e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
6028965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
6034e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
6044e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
6054e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
6064e220ebcSLois Curfman McInnes     info->memory       = isend[3];
6074e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
6088965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
609f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
6104e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
6114e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
6124e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
6134e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
6144e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
6158965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
616f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
6174e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
6184e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
6194e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
6204e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
6214e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
6228965ea79SLois Curfman McInnes   }
6234e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
6244e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
6254e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
6263a40ed3dSBarry Smith   PetscFunctionReturn(0);
6278965ea79SLois Curfman McInnes }
6288965ea79SLois Curfman McInnes 
6298c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*);
6308aaee692SLois Curfman McInnes    extern int MatLUFactorNumeric_MPIDense(Mat,Mat*);
6318aaee692SLois Curfman McInnes    extern int MatLUFactor_MPIDense(Mat,IS,IS,double);
6328aaee692SLois Curfman McInnes    extern int MatSolve_MPIDense(Mat,Vec,Vec);
6338c469469SLois Curfman McInnes    extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec);
6348aaee692SLois Curfman McInnes    extern int MatSolveTrans_MPIDense(Mat,Vec,Vec);
6358aaee692SLois Curfman McInnes    extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */
6368aaee692SLois Curfman McInnes 
6375615d1e5SSatish Balay #undef __FUNC__
6385615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIDense"
6398f6be9afSLois Curfman McInnes int MatSetOption_MPIDense(Mat A,MatOption op)
6408965ea79SLois Curfman McInnes {
64139ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
6428965ea79SLois Curfman McInnes 
6433a40ed3dSBarry Smith   PetscFunctionBegin;
6446d4a8577SBarry Smith   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
6456d4a8577SBarry Smith       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
6464787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR ||
6474787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
648219d9a1aSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
649219d9a1aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED) {
650b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
651b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
652aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
6538965ea79SLois Curfman McInnes         MatSetOption(a->A,op);
654b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
655219d9a1aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
6566d4a8577SBarry Smith              op == MAT_SYMMETRIC ||
6576d4a8577SBarry Smith              op == MAT_STRUCTURALLY_SYMMETRIC ||
658b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
659b51ba29fSSatish Balay              op == MAT_USE_HASH_TABLE) {
660981c4779SBarry Smith     PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
6613a40ed3dSBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
6623a40ed3dSBarry Smith     a->roworiented = 0; MatSetOption(a->A,op);
6633782ba37SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
6643782ba37SSatish Balay     a->donotstash = 1;
6653a40ed3dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
6663a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
6673a40ed3dSBarry Smith   } else {
6683a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
6693a40ed3dSBarry Smith   }
6703a40ed3dSBarry Smith   PetscFunctionReturn(0);
6718965ea79SLois Curfman McInnes }
6728965ea79SLois Curfman McInnes 
6735615d1e5SSatish Balay #undef __FUNC__
6745615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIDense"
6758f6be9afSLois Curfman McInnes int MatGetSize_MPIDense(Mat A,int *m,int *n)
6768965ea79SLois Curfman McInnes {
6773501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6783a40ed3dSBarry Smith 
6793a40ed3dSBarry Smith   PetscFunctionBegin;
6808965ea79SLois Curfman McInnes   *m = mat->M; *n = mat->N;
6813a40ed3dSBarry Smith   PetscFunctionReturn(0);
6828965ea79SLois Curfman McInnes }
6838965ea79SLois Curfman McInnes 
6845615d1e5SSatish Balay #undef __FUNC__
6855615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIDense"
6868f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIDense(Mat A,int *m,int *n)
6878965ea79SLois Curfman McInnes {
6883501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
6893a40ed3dSBarry Smith 
6903a40ed3dSBarry Smith   PetscFunctionBegin;
6918965ea79SLois Curfman McInnes   *m = mat->m; *n = mat->N;
6923a40ed3dSBarry Smith   PetscFunctionReturn(0);
6938965ea79SLois Curfman McInnes }
6948965ea79SLois Curfman McInnes 
6955615d1e5SSatish Balay #undef __FUNC__
6965615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIDense"
6978f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
6988965ea79SLois Curfman McInnes {
6993501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
7003a40ed3dSBarry Smith 
7013a40ed3dSBarry Smith   PetscFunctionBegin;
7028965ea79SLois Curfman McInnes   *m = mat->rstart; *n = mat->rend;
7033a40ed3dSBarry Smith   PetscFunctionReturn(0);
7048965ea79SLois Curfman McInnes }
7058965ea79SLois Curfman McInnes 
7065615d1e5SSatish Balay #undef __FUNC__
7075615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIDense"
7088f6be9afSLois Curfman McInnes int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
7098965ea79SLois Curfman McInnes {
7103501a2bdSLois Curfman McInnes   Mat_MPIDense *mat = (Mat_MPIDense *) A->data;
7113a40ed3dSBarry Smith   int          lrow, rstart = mat->rstart, rend = mat->rend,ierr;
7128965ea79SLois Curfman McInnes 
7133a40ed3dSBarry Smith   PetscFunctionBegin;
714a8c6a408SBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows")
7158965ea79SLois Curfman McInnes   lrow = row - rstart;
7163a40ed3dSBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr);
7173a40ed3dSBarry Smith   PetscFunctionReturn(0);
7188965ea79SLois Curfman McInnes }
7198965ea79SLois Curfman McInnes 
7205615d1e5SSatish Balay #undef __FUNC__
7215615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIDense"
7228f6be9afSLois Curfman McInnes int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
7238965ea79SLois Curfman McInnes {
724606d414cSSatish Balay   int ierr;
725606d414cSSatish Balay 
7263a40ed3dSBarry Smith   PetscFunctionBegin;
727606d414cSSatish Balay   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
728606d414cSSatish Balay   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
7293a40ed3dSBarry Smith   PetscFunctionReturn(0);
7308965ea79SLois Curfman McInnes }
7318965ea79SLois Curfman McInnes 
7325615d1e5SSatish Balay #undef __FUNC__
7335615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIDense"
7348f6be9afSLois Curfman McInnes int MatNorm_MPIDense(Mat A,NormType type,double *norm)
735096963f5SLois Curfman McInnes {
7363501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *) A->data;
7373501a2bdSLois Curfman McInnes   Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data;
7383501a2bdSLois Curfman McInnes   int          ierr, i, j;
7393501a2bdSLois Curfman McInnes   double       sum = 0.0;
7403501a2bdSLois Curfman McInnes   Scalar       *v = mat->v;
7413501a2bdSLois Curfman McInnes 
7423a40ed3dSBarry Smith   PetscFunctionBegin;
7433501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
7443501a2bdSLois Curfman McInnes     ierr =  MatNorm(mdn->A,type,norm);CHKERRQ(ierr);
7453501a2bdSLois Curfman McInnes   } else {
7463501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
7473501a2bdSLois Curfman McInnes       for (i=0; i<mat->n*mat->m; i++ ) {
748aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
749e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
7503501a2bdSLois Curfman McInnes #else
7513501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
7523501a2bdSLois Curfman McInnes #endif
7533501a2bdSLois Curfman McInnes       }
754ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
7553501a2bdSLois Curfman McInnes       *norm = sqrt(*norm);
7563501a2bdSLois Curfman McInnes       PLogFlops(2*mat->n*mat->m);
7573a40ed3dSBarry Smith     } else if (type == NORM_1) {
7583501a2bdSLois Curfman McInnes       double *tmp, *tmp2;
7590452661fSBarry Smith       tmp  = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp);
7603501a2bdSLois Curfman McInnes       tmp2 = tmp + mdn->N;
761549d3d68SSatish Balay       ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr);
762096963f5SLois Curfman McInnes       *norm = 0.0;
7633501a2bdSLois Curfman McInnes       v = mat->v;
7643501a2bdSLois Curfman McInnes       for ( j=0; j<mat->n; j++ ) {
7653501a2bdSLois Curfman McInnes         for ( i=0; i<mat->m; i++ ) {
76667e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
7673501a2bdSLois Curfman McInnes         }
7683501a2bdSLois Curfman McInnes       }
769ca161407SBarry Smith       ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr);
7703501a2bdSLois Curfman McInnes       for ( j=0; j<mdn->N; j++ ) {
7713501a2bdSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
7723501a2bdSLois Curfman McInnes       }
773606d414cSSatish Balay       ierr = PetscFree(tmp);CHKERRQ(ierr);
7743501a2bdSLois Curfman McInnes       PLogFlops(mat->n*mat->m);
7753a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
7763501a2bdSLois Curfman McInnes       double ntemp;
7773501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
778ca161407SBarry Smith       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr);
7793a40ed3dSBarry Smith     } else {
780a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
7813501a2bdSLois Curfman McInnes     }
7823501a2bdSLois Curfman McInnes   }
7833a40ed3dSBarry Smith   PetscFunctionReturn(0);
7843501a2bdSLois Curfman McInnes }
7853501a2bdSLois Curfman McInnes 
7865615d1e5SSatish Balay #undef __FUNC__
7875615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIDense"
7888f6be9afSLois Curfman McInnes int MatTranspose_MPIDense(Mat A,Mat *matout)
7893501a2bdSLois Curfman McInnes {
7903501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *) A->data;
7913501a2bdSLois Curfman McInnes   Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data;
7923501a2bdSLois Curfman McInnes   Mat          B;
7933501a2bdSLois Curfman McInnes   int          M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart;
7943501a2bdSLois Curfman McInnes   int          j, i, ierr;
7953501a2bdSLois Curfman McInnes   Scalar       *v;
7963501a2bdSLois Curfman McInnes 
7973a40ed3dSBarry Smith   PetscFunctionBegin;
7987056b6fcSBarry Smith   if (matout == PETSC_NULL && M != N) {
799a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place");
8007056b6fcSBarry Smith   }
8017056b6fcSBarry Smith   ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr);
8023501a2bdSLois Curfman McInnes 
8033501a2bdSLois Curfman McInnes   m = Aloc->m; n = Aloc->n; v = Aloc->v;
8040452661fSBarry Smith   rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork);
8053501a2bdSLois Curfman McInnes   for ( j=0; j<n; j++ ) {
8063501a2bdSLois Curfman McInnes     for (i=0; i<m; i++) rwork[i] = rstart + i;
8073501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
8083501a2bdSLois Curfman McInnes     v   += m;
8093501a2bdSLois Curfman McInnes   }
810606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
8116d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8126d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8133638b69dSLois Curfman McInnes   if (matout != PETSC_NULL) {
8143501a2bdSLois Curfman McInnes     *matout = B;
8153501a2bdSLois Curfman McInnes   } else {
816f830108cSBarry Smith     PetscOps *Abops;
81709dc0095SBarry Smith     MatOps   Aops;
818f830108cSBarry Smith 
8193501a2bdSLois Curfman McInnes     /* This isn't really an in-place transpose, but free data struct from a */
820606d414cSSatish Balay     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
8213501a2bdSLois Curfman McInnes     ierr = MatDestroy(a->A);CHKERRQ(ierr);
8223501a2bdSLois Curfman McInnes     if (a->lvec) VecDestroy(a->lvec);
8233501a2bdSLois Curfman McInnes     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
824606d414cSSatish Balay     ierr = PetscFree(a);CHKERRQ(ierr);
825f830108cSBarry Smith 
826f830108cSBarry Smith     /*
827f830108cSBarry Smith          This is horrible, horrible code. We need to keep the
828f830108cSBarry Smith       A pointers for the bops and ops but copy everything
829f830108cSBarry Smith       else from C.
830f830108cSBarry Smith     */
831f830108cSBarry Smith     Abops   = A->bops;
832f830108cSBarry Smith     Aops    = A->ops;
833549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
834f830108cSBarry Smith     A->bops = Abops;
835f830108cSBarry Smith     A->ops  = Aops;
836f830108cSBarry Smith 
8370452661fSBarry Smith     PetscHeaderDestroy(B);
8383501a2bdSLois Curfman McInnes   }
8393a40ed3dSBarry Smith   PetscFunctionReturn(0);
840096963f5SLois Curfman McInnes }
841096963f5SLois Curfman McInnes 
842eadb2fb4SBarry Smith #include "pinclude/blaslapack.h"
8435615d1e5SSatish Balay #undef __FUNC__
8445615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIDense"
8458f6be9afSLois Curfman McInnes int MatScale_MPIDense(Scalar *alpha,Mat inA)
84644cd7ae7SLois Curfman McInnes {
84744cd7ae7SLois Curfman McInnes   Mat_MPIDense *A = (Mat_MPIDense *) inA->data;
84844cd7ae7SLois Curfman McInnes   Mat_SeqDense *a = (Mat_SeqDense *) A->A->data;
84944cd7ae7SLois Curfman McInnes   int          one = 1, nz;
85044cd7ae7SLois Curfman McInnes 
8513a40ed3dSBarry Smith   PetscFunctionBegin;
85244cd7ae7SLois Curfman McInnes   nz = a->m*a->n;
85344cd7ae7SLois Curfman McInnes   BLscal_( &nz, alpha, a->v, &one );
85444cd7ae7SLois Curfman McInnes   PLogFlops(nz);
8553a40ed3dSBarry Smith   PetscFunctionReturn(0);
85644cd7ae7SLois Curfman McInnes }
85744cd7ae7SLois Curfman McInnes 
8585609ef8eSBarry Smith static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
8597b2a1423SBarry Smith extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
8608965ea79SLois Curfman McInnes 
8618965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
86209dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
86309dc0095SBarry Smith        MatGetRow_MPIDense,
86409dc0095SBarry Smith        MatRestoreRow_MPIDense,
86509dc0095SBarry Smith        MatMult_MPIDense,
86609dc0095SBarry Smith        MatMultAdd_MPIDense,
86709dc0095SBarry Smith        MatMultTrans_MPIDense,
86809dc0095SBarry Smith        MatMultTransAdd_MPIDense,
8698965ea79SLois Curfman McInnes        0,
87009dc0095SBarry Smith        0,
87109dc0095SBarry Smith        0,
87209dc0095SBarry Smith        0,
87309dc0095SBarry Smith        0,
87409dc0095SBarry Smith        0,
87509dc0095SBarry Smith        0,
87609dc0095SBarry Smith        MatTranspose_MPIDense,
87709dc0095SBarry Smith        MatGetInfo_MPIDense,0,
87809dc0095SBarry Smith        MatGetDiagonal_MPIDense,
87909dc0095SBarry Smith        0,
88009dc0095SBarry Smith        MatNorm_MPIDense,
88109dc0095SBarry Smith        MatAssemblyBegin_MPIDense,
88209dc0095SBarry Smith        MatAssemblyEnd_MPIDense,
88309dc0095SBarry Smith        0,
88409dc0095SBarry Smith        MatSetOption_MPIDense,
88509dc0095SBarry Smith        MatZeroEntries_MPIDense,
88609dc0095SBarry Smith        MatZeroRows_MPIDense,
88709dc0095SBarry Smith        0,
88809dc0095SBarry Smith        0,
88909dc0095SBarry Smith        0,
89009dc0095SBarry Smith        0,
89109dc0095SBarry Smith        MatGetSize_MPIDense,
89209dc0095SBarry Smith        MatGetLocalSize_MPIDense,
89339ddd567SLois Curfman McInnes        MatGetOwnershipRange_MPIDense,
89409dc0095SBarry Smith        0,
89509dc0095SBarry Smith        0,
89609dc0095SBarry Smith        MatGetArray_MPIDense,
89709dc0095SBarry Smith        MatRestoreArray_MPIDense,
8985609ef8eSBarry Smith        MatDuplicate_MPIDense,
89909dc0095SBarry Smith        0,
90009dc0095SBarry Smith        0,
90109dc0095SBarry Smith        0,
90209dc0095SBarry Smith        0,
90309dc0095SBarry Smith        0,
9042ce60cd0SSatish Balay        MatGetSubMatrices_MPIDense,
90509dc0095SBarry Smith        0,
90609dc0095SBarry Smith        MatGetValues_MPIDense,
90709dc0095SBarry Smith        0,
90809dc0095SBarry Smith        0,
90909dc0095SBarry Smith        MatScale_MPIDense,
91009dc0095SBarry Smith        0,
91109dc0095SBarry Smith        0,
91209dc0095SBarry Smith        0,
91309dc0095SBarry Smith        MatGetBlockSize_MPIDense,
91409dc0095SBarry Smith        0,
91509dc0095SBarry Smith        0,
91609dc0095SBarry Smith        0,
91709dc0095SBarry Smith        0,
91809dc0095SBarry Smith        0,
91909dc0095SBarry Smith        0,
92009dc0095SBarry Smith        0,
92109dc0095SBarry Smith        0,
92209dc0095SBarry Smith        0,
923*ca3fa75bSLois Curfman McInnes        MatGetSubMatrix_MPIDense,
92409dc0095SBarry Smith        0,
92509dc0095SBarry Smith        0,
92609dc0095SBarry Smith        MatGetMaps_Petsc};
9278965ea79SLois Curfman McInnes 
9285615d1e5SSatish Balay #undef __FUNC__
9295615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIDense"
9308965ea79SLois Curfman McInnes /*@C
93139ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
9328965ea79SLois Curfman McInnes 
933db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
934db81eaa0SLois Curfman McInnes 
9358965ea79SLois Curfman McInnes    Input Parameters:
936db81eaa0SLois Curfman McInnes +  comm - MPI communicator
9378965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
938db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
9398965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
940db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
941db81eaa0SLois Curfman McInnes -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
942dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
9438965ea79SLois Curfman McInnes 
9448965ea79SLois Curfman McInnes    Output Parameter:
945477f1c0bSLois Curfman McInnes .  A - the matrix
9468965ea79SLois Curfman McInnes 
947b259b22eSLois Curfman McInnes    Notes:
94839ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
94939ddd567SLois Curfman McInnes    storage by columns.
9508965ea79SLois Curfman McInnes 
95118f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
95218f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
953b4fd4287SBarry Smith    set data=PETSC_NULL.
95418f449edSLois Curfman McInnes 
9558965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
9568965ea79SLois Curfman McInnes    (possibly both).
9578965ea79SLois Curfman McInnes 
9583501a2bdSLois Curfman McInnes    Currently, the only parallel dense matrix decomposition is by rows,
9593501a2bdSLois Curfman McInnes    so that n=N and each submatrix owns all of the global columns.
9603501a2bdSLois Curfman McInnes 
961027ccd11SLois Curfman McInnes    Level: intermediate
962027ccd11SLois Curfman McInnes 
96339ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel
9648965ea79SLois Curfman McInnes 
96539ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
9668965ea79SLois Curfman McInnes @*/
967477f1c0bSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
9688965ea79SLois Curfman McInnes {
9698965ea79SLois Curfman McInnes   Mat          mat;
97039ddd567SLois Curfman McInnes   Mat_MPIDense *a;
97125cdf11fSBarry Smith   int          ierr, i,flg;
9728965ea79SLois Curfman McInnes 
9733a40ed3dSBarry Smith   PetscFunctionBegin;
974ed2daf61SLois Curfman McInnes   /* Note:  For now, when data is specified above, this assumes the user correctly
975ed2daf61SLois Curfman McInnes    allocates the local dense storage space.  We should add error checking. */
97618f449edSLois Curfman McInnes 
977477f1c0bSLois Curfman McInnes   *A = 0;
9783f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView);
9798965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
9800452661fSBarry Smith   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
981549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
982e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIDense;
983e1311b90SBarry Smith   mat->ops->view    = MatView_MPIDense;
9848965ea79SLois Curfman McInnes   mat->factor       = 0;
98590f02eecSBarry Smith   mat->mapping      = 0;
9868965ea79SLois Curfman McInnes 
987622d7880SLois Curfman McInnes   a->factor       = 0;
988e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
989d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr);
990d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr);
9918965ea79SLois Curfman McInnes 
99296f6c058SBarry Smith   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
99339ddd567SLois Curfman McInnes 
994c7fcc2eaSBarry Smith   /*
995c7fcc2eaSBarry Smith      The computation of n is wrong below, n should represent the number of local
996c7fcc2eaSBarry Smith      rows in the right (column vector)
997c7fcc2eaSBarry Smith   */
998c7fcc2eaSBarry Smith 
99939ddd567SLois Curfman McInnes   /* each row stores all columns */
100039ddd567SLois Curfman McInnes   if (N == PETSC_DECIDE) N = n;
1001d7e8b826SBarry Smith   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1002a8c6a408SBarry Smith   /*  if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */
1003aca0ad90SLois Curfman McInnes   a->N = mat->N = N;
1004aca0ad90SLois Curfman McInnes   a->M = mat->M = M;
1005aca0ad90SLois Curfman McInnes   a->m = mat->m = m;
1006aca0ad90SLois Curfman McInnes   a->n = mat->n = n;
10078965ea79SLois Curfman McInnes 
1008c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
1009c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
1010488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr);
101196f6c058SBarry Smith   ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr);
1012c7fcc2eaSBarry Smith 
10138965ea79SLois Curfman McInnes   /* build local table of row and column ownerships */
1014d7e8b826SBarry Smith   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1015d7e8b826SBarry Smith   a->cowners = a->rowners + a->size + 1;
1016f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1017ca161407SBarry Smith   ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
10188965ea79SLois Curfman McInnes   a->rowners[0] = 0;
10198965ea79SLois Curfman McInnes   for ( i=2; i<=a->size; i++ ) {
10208965ea79SLois Curfman McInnes     a->rowners[i] += a->rowners[i-1];
10218965ea79SLois Curfman McInnes   }
10228965ea79SLois Curfman McInnes   a->rstart = a->rowners[a->rank];
10238965ea79SLois Curfman McInnes   a->rend   = a->rowners[a->rank+1];
1024ca161407SBarry Smith   ierr      = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1025d7e8b826SBarry Smith   a->cowners[0] = 0;
1026d7e8b826SBarry Smith   for ( i=2; i<=a->size; i++ ) {
1027d7e8b826SBarry Smith     a->cowners[i] += a->cowners[i-1];
1028d7e8b826SBarry Smith   }
10298965ea79SLois Curfman McInnes 
1030029af93fSBarry Smith   ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr);
10318965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
10328965ea79SLois Curfman McInnes 
10338965ea79SLois Curfman McInnes   /* build cache for off array entries formed */
10343782ba37SSatish Balay   a->donotstash = 0;
10358798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr);
10368965ea79SLois Curfman McInnes 
10378965ea79SLois Curfman McInnes   /* stuff used for matrix vector multiply */
10388965ea79SLois Curfman McInnes   a->lvec        = 0;
10398965ea79SLois Curfman McInnes   a->Mvctx       = 0;
104039b7565bSBarry Smith   a->roworiented = 1;
10418965ea79SLois Curfman McInnes 
1042477f1c0bSLois Curfman McInnes   *A = mat;
104325cdf11fSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
104425cdf11fSBarry Smith   if (flg) {
10458c469469SLois Curfman McInnes     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
10468c469469SLois Curfman McInnes   }
10473a40ed3dSBarry Smith   PetscFunctionReturn(0);
10488965ea79SLois Curfman McInnes }
10498965ea79SLois Curfman McInnes 
10505615d1e5SSatish Balay #undef __FUNC__
10515609ef8eSBarry Smith #define __FUNC__ "MatDuplicate_MPIDense"
10525609ef8eSBarry Smith static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
10538965ea79SLois Curfman McInnes {
10548965ea79SLois Curfman McInnes   Mat          mat;
10553501a2bdSLois Curfman McInnes   Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data;
105639ddd567SLois Curfman McInnes   int          ierr;
10572ba99913SLois Curfman McInnes   FactorCtx    *factor;
10588965ea79SLois Curfman McInnes 
10593a40ed3dSBarry Smith   PetscFunctionBegin;
10608965ea79SLois Curfman McInnes   *newmat       = 0;
10613f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView);
10628965ea79SLois Curfman McInnes   PLogObjectCreate(mat);
10630452661fSBarry Smith   mat->data         = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a);
1064549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1065e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIDense;
1066e1311b90SBarry Smith   mat->ops->view    = MatView_MPIDense;
10673501a2bdSLois Curfman McInnes   mat->factor       = A->factor;
1068c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
10698965ea79SLois Curfman McInnes 
107044cd7ae7SLois Curfman McInnes   a->m = mat->m = oldmat->m;
107144cd7ae7SLois Curfman McInnes   a->n = mat->n = oldmat->n;
107244cd7ae7SLois Curfman McInnes   a->M = mat->M = oldmat->M;
107344cd7ae7SLois Curfman McInnes   a->N = mat->N = oldmat->N;
10742ba99913SLois Curfman McInnes   if (oldmat->factor) {
10752ba99913SLois Curfman McInnes     a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor);
10762ba99913SLois Curfman McInnes     /* copy factor contents ... add this code! */
10772ba99913SLois Curfman McInnes   } else a->factor = 0;
10788965ea79SLois Curfman McInnes 
10798965ea79SLois Curfman McInnes   a->rstart       = oldmat->rstart;
10808965ea79SLois Curfman McInnes   a->rend         = oldmat->rend;
10818965ea79SLois Curfman McInnes   a->size         = oldmat->size;
10828965ea79SLois Curfman McInnes   a->rank         = oldmat->rank;
1083e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
10843782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
10850452661fSBarry Smith   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners);
1086f09e8eb9SSatish Balay   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1087549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
10888798bf22SSatish Balay   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
10898965ea79SLois Curfman McInnes 
10908965ea79SLois Curfman McInnes   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
10918965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->lvec);
109255659b69SBarry Smith   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
10938965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->Mvctx);
10945609ef8eSBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
10958965ea79SLois Curfman McInnes   PLogObjectParent(mat,a->A);
10968965ea79SLois Curfman McInnes   *newmat = mat;
10973a40ed3dSBarry Smith   PetscFunctionReturn(0);
10988965ea79SLois Curfman McInnes }
10998965ea79SLois Curfman McInnes 
110077c4ece6SBarry Smith #include "sys.h"
11018965ea79SLois Curfman McInnes 
11025615d1e5SSatish Balay #undef __FUNC__
11035615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense_DenseInFile"
110490ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat)
110590ace30eSBarry Smith {
110640011551SBarry Smith   int        *rowners, i,size,rank,m,ierr,nz,j;
110790ace30eSBarry Smith   Scalar     *array,*vals,*vals_ptr;
110890ace30eSBarry Smith   MPI_Status status;
110990ace30eSBarry Smith 
11103a40ed3dSBarry Smith   PetscFunctionBegin;
1111d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1112d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
111390ace30eSBarry Smith 
111490ace30eSBarry Smith   /* determine ownership of all rows */
111590ace30eSBarry Smith   m          = M/size + ((M % size) > rank);
111690ace30eSBarry Smith   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1117ca161407SBarry Smith   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
111890ace30eSBarry Smith   rowners[0] = 0;
111990ace30eSBarry Smith   for ( i=2; i<=size; i++ ) {
112090ace30eSBarry Smith     rowners[i] += rowners[i-1];
112190ace30eSBarry Smith   }
112290ace30eSBarry Smith 
112390ace30eSBarry Smith   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
112490ace30eSBarry Smith   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
112590ace30eSBarry Smith 
112690ace30eSBarry Smith   if (!rank) {
112790ace30eSBarry Smith     vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
112890ace30eSBarry Smith 
112990ace30eSBarry Smith     /* read in my part of the matrix numerical values  */
11300752156aSBarry Smith     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
113190ace30eSBarry Smith 
113290ace30eSBarry Smith     /* insert into matrix-by row (this is why cannot directly read into array */
113390ace30eSBarry Smith     vals_ptr = vals;
113490ace30eSBarry Smith     for ( i=0; i<m; i++ ) {
113590ace30eSBarry Smith       for ( j=0; j<N; j++ ) {
113690ace30eSBarry Smith         array[i + j*m] = *vals_ptr++;
113790ace30eSBarry Smith       }
113890ace30eSBarry Smith     }
113990ace30eSBarry Smith 
114090ace30eSBarry Smith     /* read in other processors and ship out */
114190ace30eSBarry Smith     for ( i=1; i<size; i++ ) {
114290ace30eSBarry Smith       nz   = (rowners[i+1] - rowners[i])*N;
11430752156aSBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1144ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
114590ace30eSBarry Smith     }
11463a40ed3dSBarry Smith   } else {
114790ace30eSBarry Smith     /* receive numeric values */
114890ace30eSBarry Smith     vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals);
114990ace30eSBarry Smith 
115090ace30eSBarry Smith     /* receive message of values*/
1151ca161407SBarry Smith     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
115290ace30eSBarry Smith 
115390ace30eSBarry Smith     /* insert into matrix-by row (this is why cannot directly read into array */
115490ace30eSBarry Smith     vals_ptr = vals;
115590ace30eSBarry Smith     for ( i=0; i<m; i++ ) {
115690ace30eSBarry Smith       for ( j=0; j<N; j++ ) {
115790ace30eSBarry Smith         array[i + j*m] = *vals_ptr++;
115890ace30eSBarry Smith       }
115990ace30eSBarry Smith     }
116090ace30eSBarry Smith   }
1161606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
1162606d414cSSatish Balay   ierr = PetscFree(vals);CHKERRQ(ierr);
11636d4a8577SBarry Smith   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11646d4a8577SBarry Smith   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11653a40ed3dSBarry Smith   PetscFunctionReturn(0);
116690ace30eSBarry Smith }
116790ace30eSBarry Smith 
116890ace30eSBarry Smith 
11695615d1e5SSatish Balay #undef __FUNC__
11705615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense"
117119bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat)
11728965ea79SLois Curfman McInnes {
11738965ea79SLois Curfman McInnes   Mat          A;
11748965ea79SLois Curfman McInnes   Scalar       *vals,*svals;
117519bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject)viewer)->comm;
11768965ea79SLois Curfman McInnes   MPI_Status   status;
11778965ea79SLois Curfman McInnes   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
11788965ea79SLois Curfman McInnes   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
117919bcc07fSBarry Smith   int          tag = ((PetscObject)viewer)->tag;
11803a40ed3dSBarry Smith   int          i, nz, ierr, j,rstart, rend, fd;
11818965ea79SLois Curfman McInnes 
11823a40ed3dSBarry Smith   PetscFunctionBegin;
1183d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1184d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
11858965ea79SLois Curfman McInnes   if (!rank) {
118690ace30eSBarry Smith     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
11870752156aSBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1188a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
11898965ea79SLois Curfman McInnes   }
11908965ea79SLois Curfman McInnes 
1191ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
119290ace30eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
119390ace30eSBarry Smith 
119490ace30eSBarry Smith   /*
119590ace30eSBarry Smith        Handle case where matrix is stored on disk as a dense matrix
119690ace30eSBarry Smith   */
119790ace30eSBarry Smith   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
11983a40ed3dSBarry Smith     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
11993a40ed3dSBarry Smith     PetscFunctionReturn(0);
120090ace30eSBarry Smith   }
120190ace30eSBarry Smith 
12028965ea79SLois Curfman McInnes   /* determine ownership of all rows */
12038965ea79SLois Curfman McInnes   m          = M/size + ((M % size) > rank);
12040452661fSBarry Smith   rowners    = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1205ca161407SBarry Smith   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
12068965ea79SLois Curfman McInnes   rowners[0] = 0;
12078965ea79SLois Curfman McInnes   for ( i=2; i<=size; i++ ) {
12088965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
12098965ea79SLois Curfman McInnes   }
12108965ea79SLois Curfman McInnes   rstart = rowners[rank];
12118965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
12128965ea79SLois Curfman McInnes 
12138965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
12140452661fSBarry Smith   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
12158965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
12168965ea79SLois Curfman McInnes   if (!rank) {
12170452661fSBarry Smith     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
12180752156aSBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
12190452661fSBarry Smith     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
12208965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1221ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1222606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1223ca161407SBarry Smith   } else {
1224ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
12258965ea79SLois Curfman McInnes   }
12268965ea79SLois Curfman McInnes 
12278965ea79SLois Curfman McInnes   if (!rank) {
12288965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
12290452661fSBarry Smith     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1230549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
12318965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
12328965ea79SLois Curfman McInnes       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
12338965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
12348965ea79SLois Curfman McInnes       }
12358965ea79SLois Curfman McInnes     }
1236606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
12378965ea79SLois Curfman McInnes 
12388965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
12398965ea79SLois Curfman McInnes     maxnz = 0;
12408965ea79SLois Curfman McInnes     for ( i=0; i<size; i++ ) {
12410452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
12428965ea79SLois Curfman McInnes     }
12430452661fSBarry Smith     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
12448965ea79SLois Curfman McInnes 
12458965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
12468965ea79SLois Curfman McInnes     nz = procsnz[0];
12470452661fSBarry Smith     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
12480752156aSBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
12498965ea79SLois Curfman McInnes 
12508965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
12518965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
12528965ea79SLois Curfman McInnes       nz   = procsnz[i];
12530752156aSBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1254ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
12558965ea79SLois Curfman McInnes     }
1256606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
12573a40ed3dSBarry Smith   } else {
12588965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
12598965ea79SLois Curfman McInnes     nz = 0;
12608965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
12618965ea79SLois Curfman McInnes       nz += ourlens[i];
12628965ea79SLois Curfman McInnes     }
12630452661fSBarry Smith     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
12648965ea79SLois Curfman McInnes 
12658965ea79SLois Curfman McInnes     /* receive message of column indices*/
1266ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1267ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1268a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
12698965ea79SLois Curfman McInnes   }
12708965ea79SLois Curfman McInnes 
12718965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
1272549d3d68SSatish Balay   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
12738965ea79SLois Curfman McInnes   jj = 0;
12748965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
12758965ea79SLois Curfman McInnes     for ( j=0; j<ourlens[i]; j++ ) {
12768965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
12778965ea79SLois Curfman McInnes       jj++;
12788965ea79SLois Curfman McInnes     }
12798965ea79SLois Curfman McInnes   }
12808965ea79SLois Curfman McInnes 
12818965ea79SLois Curfman McInnes   /* create our matrix */
12828965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
12838965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
12848965ea79SLois Curfman McInnes   }
1285b4fd4287SBarry Smith   ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr);
12868965ea79SLois Curfman McInnes   A = *newmat;
12878965ea79SLois Curfman McInnes   for ( i=0; i<m; i++ ) {
12888965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
12898965ea79SLois Curfman McInnes   }
12908965ea79SLois Curfman McInnes 
12918965ea79SLois Curfman McInnes   if (!rank) {
12920452661fSBarry Smith     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
12938965ea79SLois Curfman McInnes 
12948965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
12958965ea79SLois Curfman McInnes     nz = procsnz[0];
12960752156aSBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
12978965ea79SLois Curfman McInnes 
12988965ea79SLois Curfman McInnes     /* insert into matrix */
12998965ea79SLois Curfman McInnes     jj      = rstart;
13008965ea79SLois Curfman McInnes     smycols = mycols;
13018965ea79SLois Curfman McInnes     svals   = vals;
13028965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
13038965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
13048965ea79SLois Curfman McInnes       smycols += ourlens[i];
13058965ea79SLois Curfman McInnes       svals   += ourlens[i];
13068965ea79SLois Curfman McInnes       jj++;
13078965ea79SLois Curfman McInnes     }
13088965ea79SLois Curfman McInnes 
13098965ea79SLois Curfman McInnes     /* read in other processors and ship out */
13108965ea79SLois Curfman McInnes     for ( i=1; i<size; i++ ) {
13118965ea79SLois Curfman McInnes       nz   = procsnz[i];
13120752156aSBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1313ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
13148965ea79SLois Curfman McInnes     }
1315606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
13163a40ed3dSBarry Smith   } else {
13178965ea79SLois Curfman McInnes     /* receive numeric values */
13180452661fSBarry Smith     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
13198965ea79SLois Curfman McInnes 
13208965ea79SLois Curfman McInnes     /* receive message of values*/
1321ca161407SBarry Smith     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1322ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1323a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
13248965ea79SLois Curfman McInnes 
13258965ea79SLois Curfman McInnes     /* insert into matrix */
13268965ea79SLois Curfman McInnes     jj      = rstart;
13278965ea79SLois Curfman McInnes     smycols = mycols;
13288965ea79SLois Curfman McInnes     svals   = vals;
13298965ea79SLois Curfman McInnes     for ( i=0; i<m; i++ ) {
13308965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
13318965ea79SLois Curfman McInnes       smycols += ourlens[i];
13328965ea79SLois Curfman McInnes       svals   += ourlens[i];
13338965ea79SLois Curfman McInnes       jj++;
13348965ea79SLois Curfman McInnes     }
13358965ea79SLois Curfman McInnes   }
1336606d414cSSatish Balay   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1337606d414cSSatish Balay   ierr = PetscFree(vals);CHKERRQ(ierr);
1338606d414cSSatish Balay   ierr = PetscFree(mycols);CHKERRQ(ierr);
1339606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
13408965ea79SLois Curfman McInnes 
13416d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13426d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13433a40ed3dSBarry Smith   PetscFunctionReturn(0);
13448965ea79SLois Curfman McInnes }
134590ace30eSBarry Smith 
134690ace30eSBarry Smith 
134790ace30eSBarry Smith 
134890ace30eSBarry Smith 
134990ace30eSBarry Smith 
1350