1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*72d926a5SLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.121 1999/08/02 21:57:54 curfman Exp curfman $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 970f55243SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h" 10f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 118965ea79SLois Curfman McInnes 127ef1d9bdSSatish Balay extern int MatSetUpMultiply_MPIDense(Mat); 137ef1d9bdSSatish Balay 145615d1e5SSatish Balay #undef __FUNC__ 155615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIDense" 168f6be9afSLois Curfman McInnes int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 178965ea79SLois Curfman McInnes { 1839b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1939b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 2039b7565bSBarry Smith int roworiented = A->roworiented; 218965ea79SLois Curfman McInnes 223a40ed3dSBarry Smith PetscFunctionBegin; 238965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 245ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 25a8c6a408SBarry Smith if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 268965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 278965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2839b7565bSBarry Smith if (roworiented) { 2939b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 303a40ed3dSBarry Smith } else { 318965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 325ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 33a8c6a408SBarry Smith if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 3439b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 3539b7565bSBarry Smith } 368965ea79SLois Curfman McInnes } 373a40ed3dSBarry Smith } else { 383782ba37SSatish Balay if ( !A->donotstash) { 3939b7565bSBarry Smith if (roworiented) { 408798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41d36fbae8SSatish Balay } else { 428798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 4339b7565bSBarry Smith } 44b49de8d1SLois Curfman McInnes } 45b49de8d1SLois Curfman McInnes } 463782ba37SSatish Balay } 473a40ed3dSBarry Smith PetscFunctionReturn(0); 48b49de8d1SLois Curfman McInnes } 49b49de8d1SLois Curfman McInnes 505615d1e5SSatish Balay #undef __FUNC__ 515615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIDense" 528f6be9afSLois Curfman McInnes int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53b49de8d1SLois Curfman McInnes { 54b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56b49de8d1SLois Curfman McInnes 573a40ed3dSBarry Smith PetscFunctionBegin; 58b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 59a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60a8c6a408SBarry Smith if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 62b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 63b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 64a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65a8c6a408SBarry Smith if (idxn[j] >= mdn->N) { 66a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67a8c6a408SBarry Smith } 68b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69b49de8d1SLois Curfman McInnes } 70a8c6a408SBarry Smith } else { 71a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 728965ea79SLois Curfman McInnes } 738965ea79SLois Curfman McInnes } 743a40ed3dSBarry Smith PetscFunctionReturn(0); 758965ea79SLois Curfman McInnes } 768965ea79SLois Curfman McInnes 775615d1e5SSatish Balay #undef __FUNC__ 785615d1e5SSatish Balay #define __FUNC__ "MatGetArray_MPIDense" 798f6be9afSLois Curfman McInnes int MatGetArray_MPIDense(Mat A,Scalar **array) 80ff14e315SSatish Balay { 81ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82ff14e315SSatish Balay int ierr; 83ff14e315SSatish Balay 843a40ed3dSBarry Smith PetscFunctionBegin; 85ff14e315SSatish Balay ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 863a40ed3dSBarry Smith PetscFunctionReturn(0); 87ff14e315SSatish Balay } 88ff14e315SSatish Balay 895615d1e5SSatish Balay #undef __FUNC__ 90ca3fa75bSLois Curfman McInnes #define __FUNC__ "MatGetSubMatrix_MPIDense" 91ca3fa75bSLois Curfman McInnes static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 92ca3fa75bSLois Curfman McInnes { 93ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 94ca3fa75bSLois Curfman McInnes Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 95*72d926a5SLois Curfman McInnes int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 96ca3fa75bSLois Curfman McInnes Scalar *av, *bv, *v = lmat->v; 97ca3fa75bSLois Curfman McInnes Mat newmat; 98ca3fa75bSLois Curfman McInnes 99ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 1007eba5e9cSLois Curfman McInnes ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 101ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 102ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 103ca3fa75bSLois Curfman McInnes ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 104ca3fa75bSLois Curfman McInnes ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 105ca3fa75bSLois Curfman McInnes 106ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 1077eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 1087eba5e9cSLois Curfman McInnes original matrix! */ 109ca3fa75bSLois Curfman McInnes 110ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 1117eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 112ca3fa75bSLois Curfman McInnes 113ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 114ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 1157eba5e9cSLois Curfman McInnes /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 1167eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 117ca3fa75bSLois Curfman McInnes newmat = *B; 118ca3fa75bSLois Curfman McInnes } else { 119ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 120ca3fa75bSLois Curfman McInnes ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 121ca3fa75bSLois Curfman McInnes } 122ca3fa75bSLois Curfman McInnes 123ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 124ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense *) newmat->data; 125ca3fa75bSLois Curfman McInnes bv = ((Mat_SeqDense *)newmatd->A->data)->v; 126ca3fa75bSLois Curfman McInnes 127ca3fa75bSLois Curfman McInnes for ( i=0; i<ncols; i++ ) { 128ca3fa75bSLois Curfman McInnes av = v + nlrows*icol[i]; 129ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++ ) { 1307eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 131ca3fa75bSLois Curfman McInnes } 132ca3fa75bSLois Curfman McInnes } 133ca3fa75bSLois Curfman McInnes 134ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 135ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 136ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 137ca3fa75bSLois Curfman McInnes 138ca3fa75bSLois Curfman McInnes /* Free work space */ 139ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 140ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 141ca3fa75bSLois Curfman McInnes *B = newmat; 142ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 143ca3fa75bSLois Curfman McInnes } 144ca3fa75bSLois Curfman McInnes 145ca3fa75bSLois Curfman McInnes #undef __FUNC__ 1465615d1e5SSatish Balay #define __FUNC__ "MatRestoreArray_MPIDense" 1478f6be9afSLois Curfman McInnes int MatRestoreArray_MPIDense(Mat A,Scalar **array) 148ff14e315SSatish Balay { 1493a40ed3dSBarry Smith PetscFunctionBegin; 1503a40ed3dSBarry Smith PetscFunctionReturn(0); 151ff14e315SSatish Balay } 152ff14e315SSatish Balay 1535615d1e5SSatish Balay #undef __FUNC__ 1545615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIDense" 1558f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 1568965ea79SLois Curfman McInnes { 15739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1588965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 159d36fbae8SSatish Balay int ierr,nstash,reallocs; 1608965ea79SLois Curfman McInnes InsertMode addv; 1618965ea79SLois Curfman McInnes 1623a40ed3dSBarry Smith PetscFunctionBegin; 1638965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 164ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 1657056b6fcSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 166a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 1678965ea79SLois Curfman McInnes } 168e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 1698965ea79SLois Curfman McInnes 1708798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 1718798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 172d36fbae8SSatish Balay PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 173d36fbae8SSatish Balay nstash,reallocs); 1743a40ed3dSBarry Smith PetscFunctionReturn(0); 1758965ea79SLois Curfman McInnes } 1768965ea79SLois Curfman McInnes 1775615d1e5SSatish Balay #undef __FUNC__ 1785615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIDense" 1798f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1808965ea79SLois Curfman McInnes { 18139ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 1827ef1d9bdSSatish Balay int i,n,ierr,*row,*col,flg,j,rstart,ncols; 1837ef1d9bdSSatish Balay Scalar *val; 184e0fa3b82SLois Curfman McInnes InsertMode addv=mat->insertmode; 1858965ea79SLois Curfman McInnes 1863a40ed3dSBarry Smith PetscFunctionBegin; 1878965ea79SLois Curfman McInnes /* wait on receives */ 1887ef1d9bdSSatish Balay while (1) { 1898798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1907ef1d9bdSSatish Balay if (!flg) break; 1918965ea79SLois Curfman McInnes 1927ef1d9bdSSatish Balay for ( i=0; i<n; ) { 1937ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1947ef1d9bdSSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 1957ef1d9bdSSatish Balay if (j < n) ncols = j-i; 1967ef1d9bdSSatish Balay else ncols = n-i; 1977ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 1987ef1d9bdSSatish Balay ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1997ef1d9bdSSatish Balay i = j; 2008965ea79SLois Curfman McInnes } 2017ef1d9bdSSatish Balay } 2028798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 2038965ea79SLois Curfman McInnes 20439ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 20539ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 2068965ea79SLois Curfman McInnes 2076d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 20839ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2098965ea79SLois Curfman McInnes } 2103a40ed3dSBarry Smith PetscFunctionReturn(0); 2118965ea79SLois Curfman McInnes } 2128965ea79SLois Curfman McInnes 2135615d1e5SSatish Balay #undef __FUNC__ 2145615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIDense" 2158f6be9afSLois Curfman McInnes int MatZeroEntries_MPIDense(Mat A) 2168965ea79SLois Curfman McInnes { 2173a40ed3dSBarry Smith int ierr; 21839ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2193a40ed3dSBarry Smith 2203a40ed3dSBarry Smith PetscFunctionBegin; 2213a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 2223a40ed3dSBarry Smith PetscFunctionReturn(0); 2238965ea79SLois Curfman McInnes } 2248965ea79SLois Curfman McInnes 2255615d1e5SSatish Balay #undef __FUNC__ 2265615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIDense" 2278f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIDense(Mat A,int *bs) 2284e220ebcSLois Curfman McInnes { 2293a40ed3dSBarry Smith PetscFunctionBegin; 2304e220ebcSLois Curfman McInnes *bs = 1; 2313a40ed3dSBarry Smith PetscFunctionReturn(0); 2324e220ebcSLois Curfman McInnes } 2334e220ebcSLois Curfman McInnes 2348965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2358965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2368965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2373501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2388965ea79SLois Curfman McInnes routine. 2398965ea79SLois Curfman McInnes */ 2405615d1e5SSatish Balay #undef __FUNC__ 2415615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIDense" 2428f6be9afSLois Curfman McInnes int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2438965ea79SLois Curfman McInnes { 24439ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2458965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2468965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2478965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2488965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2498965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2508965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2518965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2528965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2538965ea79SLois Curfman McInnes IS istmp; 2548965ea79SLois Curfman McInnes 2553a40ed3dSBarry Smith PetscFunctionBegin; 25677c4ece6SBarry Smith ierr = ISGetSize(is,&N);CHKERRQ(ierr); 2578965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 2588965ea79SLois Curfman McInnes 2598965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2600452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 261549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 262549d3d68SSatish Balay procs = nprocs + size; 2630452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 2648965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2658965ea79SLois Curfman McInnes idx = rows[i]; 2668965ea79SLois Curfman McInnes found = 0; 2678965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2688965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2698965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2708965ea79SLois Curfman McInnes } 2718965ea79SLois Curfman McInnes } 272a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 2738965ea79SLois Curfman McInnes } 2748965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2758965ea79SLois Curfman McInnes 2768965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2770452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 278ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2798965ea79SLois Curfman McInnes nrecvs = work[rank]; 280ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 2818965ea79SLois Curfman McInnes nmax = work[rank]; 282606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 2838965ea79SLois Curfman McInnes 2848965ea79SLois Curfman McInnes /* post receives: */ 2853a40ed3dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 2863a40ed3dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 2878965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 288ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 2898965ea79SLois Curfman McInnes } 2908965ea79SLois Curfman McInnes 2918965ea79SLois Curfman McInnes /* do sends: 2928965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2938965ea79SLois Curfman McInnes the ith processor 2948965ea79SLois Curfman McInnes */ 2950452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 2967056b6fcSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 2970452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 2988965ea79SLois Curfman McInnes starts[0] = 0; 2998965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3008965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 3018965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3028965ea79SLois Curfman McInnes } 3038965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3048965ea79SLois Curfman McInnes 3058965ea79SLois Curfman McInnes starts[0] = 0; 3068965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3078965ea79SLois Curfman McInnes count = 0; 3088965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3098965ea79SLois Curfman McInnes if (procs[i]) { 310ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 3118965ea79SLois Curfman McInnes } 3128965ea79SLois Curfman McInnes } 313606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 3148965ea79SLois Curfman McInnes 3158965ea79SLois Curfman McInnes base = owners[rank]; 3168965ea79SLois Curfman McInnes 3178965ea79SLois Curfman McInnes /* wait on receives */ 3180452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 3198965ea79SLois Curfman McInnes source = lens + nrecvs; 3208965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3218965ea79SLois Curfman McInnes while (count) { 322ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 3238965ea79SLois Curfman McInnes /* unpack receives into our local space */ 324ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 3258965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3268965ea79SLois Curfman McInnes lens[imdex] = n; 3278965ea79SLois Curfman McInnes slen += n; 3288965ea79SLois Curfman McInnes count--; 3298965ea79SLois Curfman McInnes } 330606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 3318965ea79SLois Curfman McInnes 3328965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3330452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 3348965ea79SLois Curfman McInnes count = 0; 3358965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3368965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3378965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3388965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3398965ea79SLois Curfman McInnes } 3408965ea79SLois Curfman McInnes } 341606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 342606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 343606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 344606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 3458965ea79SLois Curfman McInnes 3468965ea79SLois Curfman McInnes /* actually zap the local rows */ 347029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3488965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 349606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 3508965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 3518965ea79SLois Curfman McInnes ierr = ISDestroy(istmp);CHKERRQ(ierr); 3528965ea79SLois Curfman McInnes 3538965ea79SLois Curfman McInnes /* wait on sends */ 3548965ea79SLois Curfman McInnes if (nsends) { 3557056b6fcSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 356ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 357606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 3588965ea79SLois Curfman McInnes } 359606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 360606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 3618965ea79SLois Curfman McInnes 3623a40ed3dSBarry Smith PetscFunctionReturn(0); 3638965ea79SLois Curfman McInnes } 3648965ea79SLois Curfman McInnes 3655615d1e5SSatish Balay #undef __FUNC__ 3665615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIDense" 3678f6be9afSLois Curfman McInnes int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3688965ea79SLois Curfman McInnes { 36939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3708965ea79SLois Curfman McInnes int ierr; 371c456f294SBarry Smith 3723a40ed3dSBarry Smith PetscFunctionBegin; 37343a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 37443a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 37544cd7ae7SLois Curfman McInnes ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 3763a40ed3dSBarry Smith PetscFunctionReturn(0); 3778965ea79SLois Curfman McInnes } 3788965ea79SLois Curfman McInnes 3795615d1e5SSatish Balay #undef __FUNC__ 3805615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIDense" 3818f6be9afSLois Curfman McInnes int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3828965ea79SLois Curfman McInnes { 38339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3848965ea79SLois Curfman McInnes int ierr; 385c456f294SBarry Smith 3863a40ed3dSBarry Smith PetscFunctionBegin; 38743a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 38843a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 38944cd7ae7SLois Curfman McInnes ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 3903a40ed3dSBarry Smith PetscFunctionReturn(0); 3918965ea79SLois Curfman McInnes } 3928965ea79SLois Curfman McInnes 3935615d1e5SSatish Balay #undef __FUNC__ 3945615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIDense" 3958f6be9afSLois Curfman McInnes int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 396096963f5SLois Curfman McInnes { 397096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 398096963f5SLois Curfman McInnes int ierr; 3993501a2bdSLois Curfman McInnes Scalar zero = 0.0; 400096963f5SLois Curfman McInnes 4013a40ed3dSBarry Smith PetscFunctionBegin; 4023501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy);CHKERRQ(ierr); 40344cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 404537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 405537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 4063a40ed3dSBarry Smith PetscFunctionReturn(0); 407096963f5SLois Curfman McInnes } 408096963f5SLois Curfman McInnes 4095615d1e5SSatish Balay #undef __FUNC__ 4105615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIDense" 4118f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 412096963f5SLois Curfman McInnes { 413096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 414096963f5SLois Curfman McInnes int ierr; 415096963f5SLois Curfman McInnes 4163a40ed3dSBarry Smith PetscFunctionBegin; 4173501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 41844cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 419537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 420537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 4213a40ed3dSBarry Smith PetscFunctionReturn(0); 422096963f5SLois Curfman McInnes } 423096963f5SLois Curfman McInnes 4245615d1e5SSatish Balay #undef __FUNC__ 4255615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIDense" 4268f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIDense(Mat A,Vec v) 4278965ea79SLois Curfman McInnes { 42839ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 43044cd7ae7SLois Curfman McInnes int ierr, len, i, n, m = a->m, radd; 43144cd7ae7SLois Curfman McInnes Scalar *x, zero = 0.0; 432ed3cc1f0SBarry Smith 4333a40ed3dSBarry Smith PetscFunctionBegin; 43444cd7ae7SLois Curfman McInnes VecSet(&zero,v); 435096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x);CHKERRQ(ierr); 436096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 437a8c6a408SBarry Smith if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 43844cd7ae7SLois Curfman McInnes len = PetscMin(aloc->m,aloc->n); 4397ddc982cSLois Curfman McInnes radd = a->rstart*m; 44044cd7ae7SLois Curfman McInnes for ( i=0; i<len; i++ ) { 441096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 442096963f5SLois Curfman McInnes } 4439a8c540fSSatish Balay ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 4443a40ed3dSBarry Smith PetscFunctionReturn(0); 4458965ea79SLois Curfman McInnes } 4468965ea79SLois Curfman McInnes 4475615d1e5SSatish Balay #undef __FUNC__ 4485615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIDense" 449e1311b90SBarry Smith int MatDestroy_MPIDense(Mat mat) 4508965ea79SLois Curfman McInnes { 4513501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4528965ea79SLois Curfman McInnes int ierr; 453ed3cc1f0SBarry Smith 4543a40ed3dSBarry Smith PetscFunctionBegin; 45594d884c6SBarry Smith 45694d884c6SBarry Smith if (mat->mapping) { 45794d884c6SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 45894d884c6SBarry Smith } 45994d884c6SBarry Smith if (mat->bmapping) { 46094d884c6SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 46194d884c6SBarry Smith } 462aa482453SBarry Smith #if defined(PETSC_USE_LOG) 463e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4648965ea79SLois Curfman McInnes #endif 4658798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 466606d414cSSatish Balay ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 4673501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 4683501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4693501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 470622d7880SLois Curfman McInnes if (mdn->factor) { 471606d414cSSatish Balay if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 472606d414cSSatish Balay if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 473606d414cSSatish Balay if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 474606d414cSSatish Balay ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 475622d7880SLois Curfman McInnes } 476606d414cSSatish Balay ierr = PetscFree(mdn);CHKERRQ(ierr); 47761b13de0SBarry Smith if (mat->rmap) { 47861b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 47961b13de0SBarry Smith } 48061b13de0SBarry Smith if (mat->cmap) { 48161b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 48290f02eecSBarry Smith } 4838965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4840452661fSBarry Smith PetscHeaderDestroy(mat); 4853a40ed3dSBarry Smith PetscFunctionReturn(0); 4868965ea79SLois Curfman McInnes } 48739ddd567SLois Curfman McInnes 4885615d1e5SSatish Balay #undef __FUNC__ 4895615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_Binary" 49039ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4918965ea79SLois Curfman McInnes { 49239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4938965ea79SLois Curfman McInnes int ierr; 4947056b6fcSBarry Smith 4953a40ed3dSBarry Smith PetscFunctionBegin; 49639ddd567SLois Curfman McInnes if (mdn->size == 1) { 49739ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 4988965ea79SLois Curfman McInnes } 499a8c6a408SBarry Smith else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 5003a40ed3dSBarry Smith PetscFunctionReturn(0); 5018965ea79SLois Curfman McInnes } 5028965ea79SLois Curfman McInnes 5035615d1e5SSatish Balay #undef __FUNC__ 5045615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_ASCII" 50539ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 5068965ea79SLois Curfman McInnes { 50739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 50877ed5343SBarry Smith int ierr, format, size = mdn->size, rank = mdn->rank; 5098965ea79SLois Curfman McInnes FILE *fd; 51019bcc07fSBarry Smith ViewerType vtype; 5118965ea79SLois Curfman McInnes 5123a40ed3dSBarry Smith PetscFunctionBegin; 5133a40ed3dSBarry Smith ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 51490ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 515888f2ed8SSatish Balay ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 516639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5174e220ebcSLois Curfman McInnes MatInfo info; 518888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 51977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 5204e220ebcSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 5214e220ebcSLois Curfman McInnes (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 522096963f5SLois Curfman McInnes fflush(fd); 52377c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5243501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 5253a40ed3dSBarry Smith PetscFunctionReturn(0); 52696f6c058SBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 5273a40ed3dSBarry Smith PetscFunctionReturn(0); 5288965ea79SLois Curfman McInnes } 52977ed5343SBarry Smith 5308965ea79SLois Curfman McInnes if (size == 1) { 53139ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 5323a40ed3dSBarry Smith } else { 5338965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5348965ea79SLois Curfman McInnes Mat A; 53539ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 53639ddd567SLois Curfman McInnes Scalar *vals; 53739ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5388965ea79SLois Curfman McInnes 5398965ea79SLois Curfman McInnes if (!rank) { 5400513a670SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 5413a40ed3dSBarry Smith } else { 5420513a670SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 5438965ea79SLois Curfman McInnes } 5448965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5458965ea79SLois Curfman McInnes 54639ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 54739ddd567SLois Curfman McInnes but it's quick for now */ 54839ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5498965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 55039ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 55139ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 55239ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 55339ddd567SLois Curfman McInnes row++; 5548965ea79SLois Curfman McInnes } 5558965ea79SLois Curfman McInnes 5566d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5576d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5588965ea79SLois Curfman McInnes if (!rank) { 55939ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 5608965ea79SLois Curfman McInnes } 5618965ea79SLois Curfman McInnes ierr = MatDestroy(A);CHKERRQ(ierr); 5628965ea79SLois Curfman McInnes } 5633a40ed3dSBarry Smith PetscFunctionReturn(0); 5648965ea79SLois Curfman McInnes } 5658965ea79SLois Curfman McInnes 5665615d1e5SSatish Balay #undef __FUNC__ 5675615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense" 568e1311b90SBarry Smith int MatView_MPIDense(Mat mat,Viewer viewer) 5698965ea79SLois Curfman McInnes { 57039ddd567SLois Curfman McInnes int ierr; 571bcd2baecSBarry Smith ViewerType vtype; 5728965ea79SLois Curfman McInnes 573bcd2baecSBarry Smith ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 5743f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 57539ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 5763f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 5773a40ed3dSBarry Smith ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 5785cd90555SBarry Smith } else { 5795cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 5808965ea79SLois Curfman McInnes } 5813a40ed3dSBarry Smith PetscFunctionReturn(0); 5828965ea79SLois Curfman McInnes } 5838965ea79SLois Curfman McInnes 5845615d1e5SSatish Balay #undef __FUNC__ 5855615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIDense" 5868f6be9afSLois Curfman McInnes int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 5878965ea79SLois Curfman McInnes { 5883501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5893501a2bdSLois Curfman McInnes Mat mdn = mat->A; 5904e220ebcSLois Curfman McInnes int ierr; 5914e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 5928965ea79SLois Curfman McInnes 5933a40ed3dSBarry Smith PetscFunctionBegin; 5944e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 5954e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 5964e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 5974e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 5984e220ebcSLois Curfman McInnes info->block_size = 1.0; 5994e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 6004e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 6014e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 6028965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 6034e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 6044e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 6054e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 6064e220ebcSLois Curfman McInnes info->memory = isend[3]; 6074e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 6088965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 609f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 6104e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6114e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6124e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6134e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6144e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6158965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 616f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 6174e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6184e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6194e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6204e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6214e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6228965ea79SLois Curfman McInnes } 6234e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 6244e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 6254e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 6263a40ed3dSBarry Smith PetscFunctionReturn(0); 6278965ea79SLois Curfman McInnes } 6288965ea79SLois Curfman McInnes 6298c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 6308aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 6318aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 6328aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 6338c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 6348aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 6358aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 6368aaee692SLois Curfman McInnes 6375615d1e5SSatish Balay #undef __FUNC__ 6385615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIDense" 6398f6be9afSLois Curfman McInnes int MatSetOption_MPIDense(Mat A,MatOption op) 6408965ea79SLois Curfman McInnes { 64139ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6428965ea79SLois Curfman McInnes 6433a40ed3dSBarry Smith PetscFunctionBegin; 6446d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 6456d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 6464787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR || 6474787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 648219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 649219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED) { 650b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 651b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 652aeafbbfcSLois Curfman McInnes a->roworiented = 1; 6538965ea79SLois Curfman McInnes MatSetOption(a->A,op); 654b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 655219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 6566d4a8577SBarry Smith op == MAT_SYMMETRIC || 6576d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 658b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 659b51ba29fSSatish Balay op == MAT_USE_HASH_TABLE) { 660981c4779SBarry Smith PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 6613a40ed3dSBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 6623a40ed3dSBarry Smith a->roworiented = 0; MatSetOption(a->A,op); 6633782ba37SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 6643782ba37SSatish Balay a->donotstash = 1; 6653a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 6663a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 6673a40ed3dSBarry Smith } else { 6683a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 6693a40ed3dSBarry Smith } 6703a40ed3dSBarry Smith PetscFunctionReturn(0); 6718965ea79SLois Curfman McInnes } 6728965ea79SLois Curfman McInnes 6735615d1e5SSatish Balay #undef __FUNC__ 6745615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIDense" 6758f6be9afSLois Curfman McInnes int MatGetSize_MPIDense(Mat A,int *m,int *n) 6768965ea79SLois Curfman McInnes { 6773501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6783a40ed3dSBarry Smith 6793a40ed3dSBarry Smith PetscFunctionBegin; 6808965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6813a40ed3dSBarry Smith PetscFunctionReturn(0); 6828965ea79SLois Curfman McInnes } 6838965ea79SLois Curfman McInnes 6845615d1e5SSatish Balay #undef __FUNC__ 6855615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIDense" 6868f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6878965ea79SLois Curfman McInnes { 6883501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6893a40ed3dSBarry Smith 6903a40ed3dSBarry Smith PetscFunctionBegin; 6918965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6923a40ed3dSBarry Smith PetscFunctionReturn(0); 6938965ea79SLois Curfman McInnes } 6948965ea79SLois Curfman McInnes 6955615d1e5SSatish Balay #undef __FUNC__ 6965615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIDense" 6978f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6988965ea79SLois Curfman McInnes { 6993501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7003a40ed3dSBarry Smith 7013a40ed3dSBarry Smith PetscFunctionBegin; 7028965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 7033a40ed3dSBarry Smith PetscFunctionReturn(0); 7048965ea79SLois Curfman McInnes } 7058965ea79SLois Curfman McInnes 7065615d1e5SSatish Balay #undef __FUNC__ 7075615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIDense" 7088f6be9afSLois Curfman McInnes int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 7098965ea79SLois Curfman McInnes { 7103501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7113a40ed3dSBarry Smith int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 7128965ea79SLois Curfman McInnes 7133a40ed3dSBarry Smith PetscFunctionBegin; 714a8c6a408SBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 7158965ea79SLois Curfman McInnes lrow = row - rstart; 7163a40ed3dSBarry Smith ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 7173a40ed3dSBarry Smith PetscFunctionReturn(0); 7188965ea79SLois Curfman McInnes } 7198965ea79SLois Curfman McInnes 7205615d1e5SSatish Balay #undef __FUNC__ 7215615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIDense" 7228f6be9afSLois Curfman McInnes int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 7238965ea79SLois Curfman McInnes { 724606d414cSSatish Balay int ierr; 725606d414cSSatish Balay 7263a40ed3dSBarry Smith PetscFunctionBegin; 727606d414cSSatish Balay if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 728606d414cSSatish Balay if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 7293a40ed3dSBarry Smith PetscFunctionReturn(0); 7308965ea79SLois Curfman McInnes } 7318965ea79SLois Curfman McInnes 7325615d1e5SSatish Balay #undef __FUNC__ 7335b2fa520SLois Curfman McInnes #define __FUNC__ "MatDiagonalScale_MPIDense" 7345b2fa520SLois Curfman McInnes int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 7355b2fa520SLois Curfman McInnes { 7365b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 7375b2fa520SLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 7385b2fa520SLois Curfman McInnes Scalar *l,*r,x,*v; 739*72d926a5SLois Curfman McInnes int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 7405b2fa520SLois Curfman McInnes 7415b2fa520SLois Curfman McInnes PetscFunctionBegin; 742*72d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 7435b2fa520SLois Curfman McInnes if (ll) { 744*72d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 745*72d926a5SLois Curfman McInnes if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 7465b2fa520SLois Curfman McInnes ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 7475b2fa520SLois Curfman McInnes for ( i=0; i<m; i++ ) { 7485b2fa520SLois Curfman McInnes x = l[i]; 7495b2fa520SLois Curfman McInnes v = mat->v + i; 7505b2fa520SLois Curfman McInnes for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 7515b2fa520SLois Curfman McInnes } 7525b2fa520SLois Curfman McInnes ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 7535b2fa520SLois Curfman McInnes PLogFlops(n*m); 7545b2fa520SLois Curfman McInnes } 7555b2fa520SLois Curfman McInnes if (rr) { 756*72d926a5SLois Curfman McInnes ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 757*72d926a5SLois Curfman McInnes if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 7585b2fa520SLois Curfman McInnes ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 7595b2fa520SLois Curfman McInnes ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 7605b2fa520SLois Curfman McInnes ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 7615b2fa520SLois Curfman McInnes for ( i=0; i<n; i++ ) { 7625b2fa520SLois Curfman McInnes x = r[i]; 7635b2fa520SLois Curfman McInnes v = mat->v + i*m; 7645b2fa520SLois Curfman McInnes for ( j=0; j<m; j++ ) { (*v++) *= x;} 7655b2fa520SLois Curfman McInnes } 766*72d926a5SLois Curfman McInnes ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 7675b2fa520SLois Curfman McInnes PLogFlops(n*m); 7685b2fa520SLois Curfman McInnes } 7695b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 7705b2fa520SLois Curfman McInnes } 7715b2fa520SLois Curfman McInnes 7725b2fa520SLois Curfman McInnes #undef __FUNC__ 7735615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIDense" 7748f6be9afSLois Curfman McInnes int MatNorm_MPIDense(Mat A,NormType type,double *norm) 775096963f5SLois Curfman McInnes { 7763501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 7773501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 7783501a2bdSLois Curfman McInnes int ierr, i, j; 7793501a2bdSLois Curfman McInnes double sum = 0.0; 7803501a2bdSLois Curfman McInnes Scalar *v = mat->v; 7813501a2bdSLois Curfman McInnes 7823a40ed3dSBarry Smith PetscFunctionBegin; 7833501a2bdSLois Curfman McInnes if (mdn->size == 1) { 7843501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 7853501a2bdSLois Curfman McInnes } else { 7863501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 7873501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 788aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 789e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 7903501a2bdSLois Curfman McInnes #else 7913501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 7923501a2bdSLois Curfman McInnes #endif 7933501a2bdSLois Curfman McInnes } 794ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 7953501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 7963501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 7973a40ed3dSBarry Smith } else if (type == NORM_1) { 7983501a2bdSLois Curfman McInnes double *tmp, *tmp2; 7990452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 8003501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 801549d3d68SSatish Balay ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 802096963f5SLois Curfman McInnes *norm = 0.0; 8033501a2bdSLois Curfman McInnes v = mat->v; 8043501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 8053501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 80667e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8073501a2bdSLois Curfman McInnes } 8083501a2bdSLois Curfman McInnes } 809ca161407SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 8103501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 8113501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 8123501a2bdSLois Curfman McInnes } 813606d414cSSatish Balay ierr = PetscFree(tmp);CHKERRQ(ierr); 8143501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 8153a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 8163501a2bdSLois Curfman McInnes double ntemp; 8173501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 818ca161407SBarry Smith ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 8193a40ed3dSBarry Smith } else { 820a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 8213501a2bdSLois Curfman McInnes } 8223501a2bdSLois Curfman McInnes } 8233a40ed3dSBarry Smith PetscFunctionReturn(0); 8243501a2bdSLois Curfman McInnes } 8253501a2bdSLois Curfman McInnes 8265615d1e5SSatish Balay #undef __FUNC__ 8275615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIDense" 8288f6be9afSLois Curfman McInnes int MatTranspose_MPIDense(Mat A,Mat *matout) 8293501a2bdSLois Curfman McInnes { 8303501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 8313501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 8323501a2bdSLois Curfman McInnes Mat B; 8333501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 8343501a2bdSLois Curfman McInnes int j, i, ierr; 8353501a2bdSLois Curfman McInnes Scalar *v; 8363501a2bdSLois Curfman McInnes 8373a40ed3dSBarry Smith PetscFunctionBegin; 8387056b6fcSBarry Smith if (matout == PETSC_NULL && M != N) { 839a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 8407056b6fcSBarry Smith } 8417056b6fcSBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 8423501a2bdSLois Curfman McInnes 8433501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 8440452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 8453501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 8463501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 8473501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 8483501a2bdSLois Curfman McInnes v += m; 8493501a2bdSLois Curfman McInnes } 850606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 8516d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8526d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8533638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 8543501a2bdSLois Curfman McInnes *matout = B; 8553501a2bdSLois Curfman McInnes } else { 856f830108cSBarry Smith PetscOps *Abops; 85709dc0095SBarry Smith MatOps Aops; 858f830108cSBarry Smith 8593501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 860606d414cSSatish Balay ierr = PetscFree(a->rowners);CHKERRQ(ierr); 8613501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A);CHKERRQ(ierr); 8623501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 8633501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 864606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 865f830108cSBarry Smith 866f830108cSBarry Smith /* 867f830108cSBarry Smith This is horrible, horrible code. We need to keep the 868f830108cSBarry Smith A pointers for the bops and ops but copy everything 869f830108cSBarry Smith else from C. 870f830108cSBarry Smith */ 871f830108cSBarry Smith Abops = A->bops; 872f830108cSBarry Smith Aops = A->ops; 873549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 874f830108cSBarry Smith A->bops = Abops; 875f830108cSBarry Smith A->ops = Aops; 876f830108cSBarry Smith 8770452661fSBarry Smith PetscHeaderDestroy(B); 8783501a2bdSLois Curfman McInnes } 8793a40ed3dSBarry Smith PetscFunctionReturn(0); 880096963f5SLois Curfman McInnes } 881096963f5SLois Curfman McInnes 882eadb2fb4SBarry Smith #include "pinclude/blaslapack.h" 8835615d1e5SSatish Balay #undef __FUNC__ 8845615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIDense" 8858f6be9afSLois Curfman McInnes int MatScale_MPIDense(Scalar *alpha,Mat inA) 88644cd7ae7SLois Curfman McInnes { 88744cd7ae7SLois Curfman McInnes Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 88844cd7ae7SLois Curfman McInnes Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 88944cd7ae7SLois Curfman McInnes int one = 1, nz; 89044cd7ae7SLois Curfman McInnes 8913a40ed3dSBarry Smith PetscFunctionBegin; 89244cd7ae7SLois Curfman McInnes nz = a->m*a->n; 89344cd7ae7SLois Curfman McInnes BLscal_( &nz, alpha, a->v, &one ); 89444cd7ae7SLois Curfman McInnes PLogFlops(nz); 8953a40ed3dSBarry Smith PetscFunctionReturn(0); 89644cd7ae7SLois Curfman McInnes } 89744cd7ae7SLois Curfman McInnes 8985609ef8eSBarry Smith static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 8997b2a1423SBarry Smith extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 9008965ea79SLois Curfman McInnes 9018965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 90209dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 90309dc0095SBarry Smith MatGetRow_MPIDense, 90409dc0095SBarry Smith MatRestoreRow_MPIDense, 90509dc0095SBarry Smith MatMult_MPIDense, 90609dc0095SBarry Smith MatMultAdd_MPIDense, 90709dc0095SBarry Smith MatMultTrans_MPIDense, 90809dc0095SBarry Smith MatMultTransAdd_MPIDense, 9098965ea79SLois Curfman McInnes 0, 91009dc0095SBarry Smith 0, 91109dc0095SBarry Smith 0, 91209dc0095SBarry Smith 0, 91309dc0095SBarry Smith 0, 91409dc0095SBarry Smith 0, 91509dc0095SBarry Smith 0, 91609dc0095SBarry Smith MatTranspose_MPIDense, 91709dc0095SBarry Smith MatGetInfo_MPIDense,0, 91809dc0095SBarry Smith MatGetDiagonal_MPIDense, 9195b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 92009dc0095SBarry Smith MatNorm_MPIDense, 92109dc0095SBarry Smith MatAssemblyBegin_MPIDense, 92209dc0095SBarry Smith MatAssemblyEnd_MPIDense, 92309dc0095SBarry Smith 0, 92409dc0095SBarry Smith MatSetOption_MPIDense, 92509dc0095SBarry Smith MatZeroEntries_MPIDense, 92609dc0095SBarry Smith MatZeroRows_MPIDense, 92709dc0095SBarry Smith 0, 92809dc0095SBarry Smith 0, 92909dc0095SBarry Smith 0, 93009dc0095SBarry Smith 0, 93109dc0095SBarry Smith MatGetSize_MPIDense, 93209dc0095SBarry Smith MatGetLocalSize_MPIDense, 93339ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 93409dc0095SBarry Smith 0, 93509dc0095SBarry Smith 0, 93609dc0095SBarry Smith MatGetArray_MPIDense, 93709dc0095SBarry Smith MatRestoreArray_MPIDense, 9385609ef8eSBarry Smith MatDuplicate_MPIDense, 93909dc0095SBarry Smith 0, 94009dc0095SBarry Smith 0, 94109dc0095SBarry Smith 0, 94209dc0095SBarry Smith 0, 94309dc0095SBarry Smith 0, 9442ce60cd0SSatish Balay MatGetSubMatrices_MPIDense, 94509dc0095SBarry Smith 0, 94609dc0095SBarry Smith MatGetValues_MPIDense, 94709dc0095SBarry Smith 0, 94809dc0095SBarry Smith 0, 94909dc0095SBarry Smith MatScale_MPIDense, 95009dc0095SBarry Smith 0, 95109dc0095SBarry Smith 0, 95209dc0095SBarry Smith 0, 95309dc0095SBarry Smith MatGetBlockSize_MPIDense, 95409dc0095SBarry Smith 0, 95509dc0095SBarry Smith 0, 95609dc0095SBarry Smith 0, 95709dc0095SBarry Smith 0, 95809dc0095SBarry Smith 0, 95909dc0095SBarry Smith 0, 96009dc0095SBarry Smith 0, 96109dc0095SBarry Smith 0, 96209dc0095SBarry Smith 0, 963ca3fa75bSLois Curfman McInnes MatGetSubMatrix_MPIDense, 96409dc0095SBarry Smith 0, 96509dc0095SBarry Smith 0, 96609dc0095SBarry Smith MatGetMaps_Petsc}; 9678965ea79SLois Curfman McInnes 9685615d1e5SSatish Balay #undef __FUNC__ 9695615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIDense" 9708965ea79SLois Curfman McInnes /*@C 97139ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 9728965ea79SLois Curfman McInnes 973db81eaa0SLois Curfman McInnes Collective on MPI_Comm 974db81eaa0SLois Curfman McInnes 9758965ea79SLois Curfman McInnes Input Parameters: 976db81eaa0SLois Curfman McInnes + comm - MPI communicator 9778965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 978db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 9798965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 980db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 981db81eaa0SLois Curfman McInnes - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 982dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 9838965ea79SLois Curfman McInnes 9848965ea79SLois Curfman McInnes Output Parameter: 985477f1c0bSLois Curfman McInnes . A - the matrix 9868965ea79SLois Curfman McInnes 987b259b22eSLois Curfman McInnes Notes: 98839ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 98939ddd567SLois Curfman McInnes storage by columns. 9908965ea79SLois Curfman McInnes 99118f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 99218f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 993b4fd4287SBarry Smith set data=PETSC_NULL. 99418f449edSLois Curfman McInnes 9958965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 9968965ea79SLois Curfman McInnes (possibly both). 9978965ea79SLois Curfman McInnes 9983501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 9993501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 10003501a2bdSLois Curfman McInnes 1001027ccd11SLois Curfman McInnes Level: intermediate 1002027ccd11SLois Curfman McInnes 100339ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 10048965ea79SLois Curfman McInnes 100539ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 10068965ea79SLois Curfman McInnes @*/ 1007477f1c0bSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 10088965ea79SLois Curfman McInnes { 10098965ea79SLois Curfman McInnes Mat mat; 101039ddd567SLois Curfman McInnes Mat_MPIDense *a; 101125cdf11fSBarry Smith int ierr, i,flg; 10128965ea79SLois Curfman McInnes 10133a40ed3dSBarry Smith PetscFunctionBegin; 1014ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 1015ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 101618f449edSLois Curfman McInnes 1017477f1c0bSLois Curfman McInnes *A = 0; 10183f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 10198965ea79SLois Curfman McInnes PLogObjectCreate(mat); 10200452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1021549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1022e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIDense; 1023e1311b90SBarry Smith mat->ops->view = MatView_MPIDense; 10248965ea79SLois Curfman McInnes mat->factor = 0; 102590f02eecSBarry Smith mat->mapping = 0; 10268965ea79SLois Curfman McInnes 1027622d7880SLois Curfman McInnes a->factor = 0; 1028e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 1029d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1030d132466eSBarry Smith ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 10318965ea79SLois Curfman McInnes 103296f6c058SBarry Smith ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 103339ddd567SLois Curfman McInnes 1034c7fcc2eaSBarry Smith /* 1035c7fcc2eaSBarry Smith The computation of n is wrong below, n should represent the number of local 1036c7fcc2eaSBarry Smith rows in the right (column vector) 1037c7fcc2eaSBarry Smith */ 1038c7fcc2eaSBarry Smith 103939ddd567SLois Curfman McInnes /* each row stores all columns */ 104039ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 1041d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1042a8c6a408SBarry Smith /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 1043aca0ad90SLois Curfman McInnes a->N = mat->N = N; 1044aca0ad90SLois Curfman McInnes a->M = mat->M = M; 1045aca0ad90SLois Curfman McInnes a->m = mat->m = m; 1046aca0ad90SLois Curfman McInnes a->n = mat->n = n; 10478965ea79SLois Curfman McInnes 1048c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 1049c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 1050488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 105196f6c058SBarry Smith ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 1052c7fcc2eaSBarry Smith 10538965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 1054d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1055d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 1056f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1057ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 10588965ea79SLois Curfman McInnes a->rowners[0] = 0; 10598965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 10608965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 10618965ea79SLois Curfman McInnes } 10628965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 10638965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1064ca161407SBarry Smith ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1065d7e8b826SBarry Smith a->cowners[0] = 0; 1066d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 1067d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 1068d7e8b826SBarry Smith } 10698965ea79SLois Curfman McInnes 1070029af93fSBarry Smith ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 10718965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 10728965ea79SLois Curfman McInnes 10738965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 10743782ba37SSatish Balay a->donotstash = 0; 10758798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 10768965ea79SLois Curfman McInnes 10778965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 10788965ea79SLois Curfman McInnes a->lvec = 0; 10798965ea79SLois Curfman McInnes a->Mvctx = 0; 108039b7565bSBarry Smith a->roworiented = 1; 10818965ea79SLois Curfman McInnes 1082477f1c0bSLois Curfman McInnes *A = mat; 108325cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 108425cdf11fSBarry Smith if (flg) { 10858c469469SLois Curfman McInnes ierr = MatPrintHelp(mat);CHKERRQ(ierr); 10868c469469SLois Curfman McInnes } 10873a40ed3dSBarry Smith PetscFunctionReturn(0); 10888965ea79SLois Curfman McInnes } 10898965ea79SLois Curfman McInnes 10905615d1e5SSatish Balay #undef __FUNC__ 10915609ef8eSBarry Smith #define __FUNC__ "MatDuplicate_MPIDense" 10925609ef8eSBarry Smith static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 10938965ea79SLois Curfman McInnes { 10948965ea79SLois Curfman McInnes Mat mat; 10953501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 109639ddd567SLois Curfman McInnes int ierr; 10972ba99913SLois Curfman McInnes FactorCtx *factor; 10988965ea79SLois Curfman McInnes 10993a40ed3dSBarry Smith PetscFunctionBegin; 11008965ea79SLois Curfman McInnes *newmat = 0; 11013f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 11028965ea79SLois Curfman McInnes PLogObjectCreate(mat); 11030452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1104549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1105e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIDense; 1106e1311b90SBarry Smith mat->ops->view = MatView_MPIDense; 11073501a2bdSLois Curfman McInnes mat->factor = A->factor; 1108c456f294SBarry Smith mat->assembled = PETSC_TRUE; 11098965ea79SLois Curfman McInnes 111044cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 111144cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 111244cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 111344cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 11142ba99913SLois Curfman McInnes if (oldmat->factor) { 11152ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 11162ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 11172ba99913SLois Curfman McInnes } else a->factor = 0; 11188965ea79SLois Curfman McInnes 11198965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 11208965ea79SLois Curfman McInnes a->rend = oldmat->rend; 11218965ea79SLois Curfman McInnes a->size = oldmat->size; 11228965ea79SLois Curfman McInnes a->rank = oldmat->rank; 1123e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 11243782ba37SSatish Balay a->donotstash = oldmat->donotstash; 11250452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1126f09e8eb9SSatish Balay PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1127549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 11288798bf22SSatish Balay ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 11298965ea79SLois Curfman McInnes 11308965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 11318965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 113255659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 11338965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 11345609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 11358965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 11368965ea79SLois Curfman McInnes *newmat = mat; 11373a40ed3dSBarry Smith PetscFunctionReturn(0); 11388965ea79SLois Curfman McInnes } 11398965ea79SLois Curfman McInnes 114077c4ece6SBarry Smith #include "sys.h" 11418965ea79SLois Curfman McInnes 11425615d1e5SSatish Balay #undef __FUNC__ 11435615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 114490ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 114590ace30eSBarry Smith { 114640011551SBarry Smith int *rowners, i,size,rank,m,ierr,nz,j; 114790ace30eSBarry Smith Scalar *array,*vals,*vals_ptr; 114890ace30eSBarry Smith MPI_Status status; 114990ace30eSBarry Smith 11503a40ed3dSBarry Smith PetscFunctionBegin; 1151d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 115390ace30eSBarry Smith 115490ace30eSBarry Smith /* determine ownership of all rows */ 115590ace30eSBarry Smith m = M/size + ((M % size) > rank); 115690ace30eSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1157ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 115890ace30eSBarry Smith rowners[0] = 0; 115990ace30eSBarry Smith for ( i=2; i<=size; i++ ) { 116090ace30eSBarry Smith rowners[i] += rowners[i-1]; 116190ace30eSBarry Smith } 116290ace30eSBarry Smith 116390ace30eSBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 116490ace30eSBarry Smith ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 116590ace30eSBarry Smith 116690ace30eSBarry Smith if (!rank) { 116790ace30eSBarry Smith vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 116890ace30eSBarry Smith 116990ace30eSBarry Smith /* read in my part of the matrix numerical values */ 11700752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 117190ace30eSBarry Smith 117290ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 117390ace30eSBarry Smith vals_ptr = vals; 117490ace30eSBarry Smith for ( i=0; i<m; i++ ) { 117590ace30eSBarry Smith for ( j=0; j<N; j++ ) { 117690ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 117790ace30eSBarry Smith } 117890ace30eSBarry Smith } 117990ace30eSBarry Smith 118090ace30eSBarry Smith /* read in other processors and ship out */ 118190ace30eSBarry Smith for ( i=1; i<size; i++ ) { 118290ace30eSBarry Smith nz = (rowners[i+1] - rowners[i])*N; 11830752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1184ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 118590ace30eSBarry Smith } 11863a40ed3dSBarry Smith } else { 118790ace30eSBarry Smith /* receive numeric values */ 118890ace30eSBarry Smith vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 118990ace30eSBarry Smith 119090ace30eSBarry Smith /* receive message of values*/ 1191ca161407SBarry Smith ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 119290ace30eSBarry Smith 119390ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 119490ace30eSBarry Smith vals_ptr = vals; 119590ace30eSBarry Smith for ( i=0; i<m; i++ ) { 119690ace30eSBarry Smith for ( j=0; j<N; j++ ) { 119790ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 119890ace30eSBarry Smith } 119990ace30eSBarry Smith } 120090ace30eSBarry Smith } 1201606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 1202606d414cSSatish Balay ierr = PetscFree(vals);CHKERRQ(ierr); 12036d4a8577SBarry Smith ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 12046d4a8577SBarry Smith ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 12053a40ed3dSBarry Smith PetscFunctionReturn(0); 120690ace30eSBarry Smith } 120790ace30eSBarry Smith 120890ace30eSBarry Smith 12095615d1e5SSatish Balay #undef __FUNC__ 12105615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense" 121119bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 12128965ea79SLois Curfman McInnes { 12138965ea79SLois Curfman McInnes Mat A; 12148965ea79SLois Curfman McInnes Scalar *vals,*svals; 121519bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 12168965ea79SLois Curfman McInnes MPI_Status status; 12178965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 12188965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 121919bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 12203a40ed3dSBarry Smith int i, nz, ierr, j,rstart, rend, fd; 12218965ea79SLois Curfman McInnes 12223a40ed3dSBarry Smith PetscFunctionBegin; 1223d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1224d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 12258965ea79SLois Curfman McInnes if (!rank) { 122690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 12270752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1228a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 12298965ea79SLois Curfman McInnes } 12308965ea79SLois Curfman McInnes 1231ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 123290ace30eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 123390ace30eSBarry Smith 123490ace30eSBarry Smith /* 123590ace30eSBarry Smith Handle case where matrix is stored on disk as a dense matrix 123690ace30eSBarry Smith */ 123790ace30eSBarry Smith if (nz == MATRIX_BINARY_FORMAT_DENSE) { 12383a40ed3dSBarry Smith ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 12393a40ed3dSBarry Smith PetscFunctionReturn(0); 124090ace30eSBarry Smith } 124190ace30eSBarry Smith 12428965ea79SLois Curfman McInnes /* determine ownership of all rows */ 12438965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 12440452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1245ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 12468965ea79SLois Curfman McInnes rowners[0] = 0; 12478965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 12488965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 12498965ea79SLois Curfman McInnes } 12508965ea79SLois Curfman McInnes rstart = rowners[rank]; 12518965ea79SLois Curfman McInnes rend = rowners[rank+1]; 12528965ea79SLois Curfman McInnes 12538965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 12540452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 12558965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 12568965ea79SLois Curfman McInnes if (!rank) { 12570452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 12580752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 12590452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 12608965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1261ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1262606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1263ca161407SBarry Smith } else { 1264ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 12658965ea79SLois Curfman McInnes } 12668965ea79SLois Curfman McInnes 12678965ea79SLois Curfman McInnes if (!rank) { 12688965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 12690452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1270549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 12718965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 12728965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 12738965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 12748965ea79SLois Curfman McInnes } 12758965ea79SLois Curfman McInnes } 1276606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 12778965ea79SLois Curfman McInnes 12788965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 12798965ea79SLois Curfman McInnes maxnz = 0; 12808965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 12810452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 12828965ea79SLois Curfman McInnes } 12830452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 12848965ea79SLois Curfman McInnes 12858965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 12868965ea79SLois Curfman McInnes nz = procsnz[0]; 12870452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 12880752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 12898965ea79SLois Curfman McInnes 12908965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 12918965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 12928965ea79SLois Curfman McInnes nz = procsnz[i]; 12930752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1294ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 12958965ea79SLois Curfman McInnes } 1296606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 12973a40ed3dSBarry Smith } else { 12988965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 12998965ea79SLois Curfman McInnes nz = 0; 13008965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13018965ea79SLois Curfman McInnes nz += ourlens[i]; 13028965ea79SLois Curfman McInnes } 13030452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 13048965ea79SLois Curfman McInnes 13058965ea79SLois Curfman McInnes /* receive message of column indices*/ 1306ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1307ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1308a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 13098965ea79SLois Curfman McInnes } 13108965ea79SLois Curfman McInnes 13118965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1312549d3d68SSatish Balay ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 13138965ea79SLois Curfman McInnes jj = 0; 13148965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13158965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 13168965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 13178965ea79SLois Curfman McInnes jj++; 13188965ea79SLois Curfman McInnes } 13198965ea79SLois Curfman McInnes } 13208965ea79SLois Curfman McInnes 13218965ea79SLois Curfman McInnes /* create our matrix */ 13228965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13238965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 13248965ea79SLois Curfman McInnes } 1325b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 13268965ea79SLois Curfman McInnes A = *newmat; 13278965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13288965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 13298965ea79SLois Curfman McInnes } 13308965ea79SLois Curfman McInnes 13318965ea79SLois Curfman McInnes if (!rank) { 13320452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 13338965ea79SLois Curfman McInnes 13348965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 13358965ea79SLois Curfman McInnes nz = procsnz[0]; 13360752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 13378965ea79SLois Curfman McInnes 13388965ea79SLois Curfman McInnes /* insert into matrix */ 13398965ea79SLois Curfman McInnes jj = rstart; 13408965ea79SLois Curfman McInnes smycols = mycols; 13418965ea79SLois Curfman McInnes svals = vals; 13428965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13438965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 13448965ea79SLois Curfman McInnes smycols += ourlens[i]; 13458965ea79SLois Curfman McInnes svals += ourlens[i]; 13468965ea79SLois Curfman McInnes jj++; 13478965ea79SLois Curfman McInnes } 13488965ea79SLois Curfman McInnes 13498965ea79SLois Curfman McInnes /* read in other processors and ship out */ 13508965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 13518965ea79SLois Curfman McInnes nz = procsnz[i]; 13520752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1353ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 13548965ea79SLois Curfman McInnes } 1355606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 13563a40ed3dSBarry Smith } else { 13578965ea79SLois Curfman McInnes /* receive numeric values */ 13580452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 13598965ea79SLois Curfman McInnes 13608965ea79SLois Curfman McInnes /* receive message of values*/ 1361ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1362ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1363a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 13648965ea79SLois Curfman McInnes 13658965ea79SLois Curfman McInnes /* insert into matrix */ 13668965ea79SLois Curfman McInnes jj = rstart; 13678965ea79SLois Curfman McInnes smycols = mycols; 13688965ea79SLois Curfman McInnes svals = vals; 13698965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13708965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 13718965ea79SLois Curfman McInnes smycols += ourlens[i]; 13728965ea79SLois Curfman McInnes svals += ourlens[i]; 13738965ea79SLois Curfman McInnes jj++; 13748965ea79SLois Curfman McInnes } 13758965ea79SLois Curfman McInnes } 1376606d414cSSatish Balay ierr = PetscFree(ourlens);CHKERRQ(ierr); 1377606d414cSSatish Balay ierr = PetscFree(vals);CHKERRQ(ierr); 1378606d414cSSatish Balay ierr = PetscFree(mycols);CHKERRQ(ierr); 1379606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 13808965ea79SLois Curfman McInnes 13816d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13826d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13833a40ed3dSBarry Smith PetscFunctionReturn(0); 13848965ea79SLois Curfman McInnes } 138590ace30eSBarry Smith 138690ace30eSBarry Smith 138790ace30eSBarry Smith 138890ace30eSBarry Smith 138990ace30eSBarry Smith 1390