1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*6831982aSBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.128 1999/10/13 20:37:17 bsmith Exp bsmith $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 970f55243SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h" 10f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 118965ea79SLois Curfman McInnes 120de54da6SSatish Balay EXTERN_C_BEGIN 130de54da6SSatish Balay #undef __FUNC__ 140de54da6SSatish Balay #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 150de54da6SSatish Balay int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 160de54da6SSatish Balay { 170de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 180de54da6SSatish Balay int m = mdn->m,rstart = mdn->rstart,rank,ierr; 190de54da6SSatish Balay Scalar *array; 200de54da6SSatish Balay MPI_Comm comm; 210de54da6SSatish Balay 220de54da6SSatish Balay PetscFunctionBegin; 230de54da6SSatish Balay if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 240de54da6SSatish Balay 250de54da6SSatish Balay /* The reuse aspect is not implemented efficiently */ 260de54da6SSatish Balay if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 270de54da6SSatish Balay 280de54da6SSatish Balay ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 290de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 300de54da6SSatish Balay ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 310de54da6SSatish Balay ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 320de54da6SSatish Balay ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 330de54da6SSatish Balay ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 340de54da6SSatish Balay ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 350de54da6SSatish Balay 360de54da6SSatish Balay *iscopy = PETSC_TRUE; 370de54da6SSatish Balay PetscFunctionReturn(0); 380de54da6SSatish Balay } 390de54da6SSatish Balay EXTERN_C_END 400de54da6SSatish Balay 417ef1d9bdSSatish Balay extern int MatSetUpMultiply_MPIDense(Mat); 427ef1d9bdSSatish Balay 435615d1e5SSatish Balay #undef __FUNC__ 445615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIDense" 458f6be9afSLois Curfman McInnes int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 468965ea79SLois Curfman McInnes { 4739b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 4839b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 4939b7565bSBarry Smith int roworiented = A->roworiented; 508965ea79SLois Curfman McInnes 513a40ed3dSBarry Smith PetscFunctionBegin; 528965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 535ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 54a8c6a408SBarry Smith if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 558965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 568965ea79SLois Curfman McInnes row = idxm[i] - rstart; 5739b7565bSBarry Smith if (roworiented) { 5839b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 593a40ed3dSBarry Smith } else { 608965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 615ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 62a8c6a408SBarry Smith if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 6339b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 6439b7565bSBarry Smith } 658965ea79SLois Curfman McInnes } 663a40ed3dSBarry Smith } else { 673782ba37SSatish Balay if ( !A->donotstash) { 6839b7565bSBarry Smith if (roworiented) { 698798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 70d36fbae8SSatish Balay } else { 718798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 7239b7565bSBarry Smith } 73b49de8d1SLois Curfman McInnes } 74b49de8d1SLois Curfman McInnes } 753782ba37SSatish Balay } 763a40ed3dSBarry Smith PetscFunctionReturn(0); 77b49de8d1SLois Curfman McInnes } 78b49de8d1SLois Curfman McInnes 795615d1e5SSatish Balay #undef __FUNC__ 805615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIDense" 818f6be9afSLois Curfman McInnes int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 82b49de8d1SLois Curfman McInnes { 83b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 84b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 85b49de8d1SLois Curfman McInnes 863a40ed3dSBarry Smith PetscFunctionBegin; 87b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 88a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 89a8c6a408SBarry Smith if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 90b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 91b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 92b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 93a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 94a8c6a408SBarry Smith if (idxn[j] >= mdn->N) { 95a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 96a8c6a408SBarry Smith } 97b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 98b49de8d1SLois Curfman McInnes } 99a8c6a408SBarry Smith } else { 100a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 1018965ea79SLois Curfman McInnes } 1028965ea79SLois Curfman McInnes } 1033a40ed3dSBarry Smith PetscFunctionReturn(0); 1048965ea79SLois Curfman McInnes } 1058965ea79SLois Curfman McInnes 1065615d1e5SSatish Balay #undef __FUNC__ 1075615d1e5SSatish Balay #define __FUNC__ "MatGetArray_MPIDense" 1088f6be9afSLois Curfman McInnes int MatGetArray_MPIDense(Mat A,Scalar **array) 109ff14e315SSatish Balay { 110ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *) A->data; 111ff14e315SSatish Balay int ierr; 112ff14e315SSatish Balay 1133a40ed3dSBarry Smith PetscFunctionBegin; 114ff14e315SSatish Balay ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 1153a40ed3dSBarry Smith PetscFunctionReturn(0); 116ff14e315SSatish Balay } 117ff14e315SSatish Balay 1185615d1e5SSatish Balay #undef __FUNC__ 119ca3fa75bSLois Curfman McInnes #define __FUNC__ "MatGetSubMatrix_MPIDense" 120ca3fa75bSLois Curfman McInnes static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 121ca3fa75bSLois Curfman McInnes { 122ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 123ca3fa75bSLois Curfman McInnes Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 12472d926a5SLois Curfman McInnes int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 125ca3fa75bSLois Curfman McInnes Scalar *av, *bv, *v = lmat->v; 126ca3fa75bSLois Curfman McInnes Mat newmat; 127ca3fa75bSLois Curfman McInnes 128ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 1297eba5e9cSLois Curfman McInnes ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 130ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 131ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 132ca3fa75bSLois Curfman McInnes ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 133ca3fa75bSLois Curfman McInnes ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 134ca3fa75bSLois Curfman McInnes 135ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 1367eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 1377eba5e9cSLois Curfman McInnes original matrix! */ 138ca3fa75bSLois Curfman McInnes 139ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 1407eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 141ca3fa75bSLois Curfman McInnes 142ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 143ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 1447eba5e9cSLois Curfman McInnes /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 1457eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 146ca3fa75bSLois Curfman McInnes newmat = *B; 147ca3fa75bSLois Curfman McInnes } else { 148ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 149ca3fa75bSLois Curfman McInnes ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 150ca3fa75bSLois Curfman McInnes } 151ca3fa75bSLois Curfman McInnes 152ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 153ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense *) newmat->data; 154ca3fa75bSLois Curfman McInnes bv = ((Mat_SeqDense *)newmatd->A->data)->v; 155ca3fa75bSLois Curfman McInnes 156ca3fa75bSLois Curfman McInnes for ( i=0; i<ncols; i++ ) { 157ca3fa75bSLois Curfman McInnes av = v + nlrows*icol[i]; 158ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++ ) { 1597eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 160ca3fa75bSLois Curfman McInnes } 161ca3fa75bSLois Curfman McInnes } 162ca3fa75bSLois Curfman McInnes 163ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 164ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166ca3fa75bSLois Curfman McInnes 167ca3fa75bSLois Curfman McInnes /* Free work space */ 168ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 169ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 170ca3fa75bSLois Curfman McInnes *B = newmat; 171ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 172ca3fa75bSLois Curfman McInnes } 173ca3fa75bSLois Curfman McInnes 174ca3fa75bSLois Curfman McInnes #undef __FUNC__ 1755615d1e5SSatish Balay #define __FUNC__ "MatRestoreArray_MPIDense" 1768f6be9afSLois Curfman McInnes int MatRestoreArray_MPIDense(Mat A,Scalar **array) 177ff14e315SSatish Balay { 1783a40ed3dSBarry Smith PetscFunctionBegin; 1793a40ed3dSBarry Smith PetscFunctionReturn(0); 180ff14e315SSatish Balay } 181ff14e315SSatish Balay 1825615d1e5SSatish Balay #undef __FUNC__ 1835615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIDense" 1848f6be9afSLois Curfman McInnes int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 1858965ea79SLois Curfman McInnes { 18639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1878965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 188d36fbae8SSatish Balay int ierr,nstash,reallocs; 1898965ea79SLois Curfman McInnes InsertMode addv; 1908965ea79SLois Curfman McInnes 1913a40ed3dSBarry Smith PetscFunctionBegin; 1928965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 193ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 1947056b6fcSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 195a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 1968965ea79SLois Curfman McInnes } 197e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 1988965ea79SLois Curfman McInnes 1998798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 2008798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 201d36fbae8SSatish Balay PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 202d36fbae8SSatish Balay nstash,reallocs); 2033a40ed3dSBarry Smith PetscFunctionReturn(0); 2048965ea79SLois Curfman McInnes } 2058965ea79SLois Curfman McInnes 2065615d1e5SSatish Balay #undef __FUNC__ 2075615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIDense" 2088f6be9afSLois Curfman McInnes int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 2098965ea79SLois Curfman McInnes { 21039ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 2117ef1d9bdSSatish Balay int i,n,ierr,*row,*col,flg,j,rstart,ncols; 2127ef1d9bdSSatish Balay Scalar *val; 213e0fa3b82SLois Curfman McInnes InsertMode addv=mat->insertmode; 2148965ea79SLois Curfman McInnes 2153a40ed3dSBarry Smith PetscFunctionBegin; 2168965ea79SLois Curfman McInnes /* wait on receives */ 2177ef1d9bdSSatish Balay while (1) { 2188798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 2197ef1d9bdSSatish Balay if (!flg) break; 2208965ea79SLois Curfman McInnes 2217ef1d9bdSSatish Balay for ( i=0; i<n; ) { 2227ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 2237ef1d9bdSSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 2247ef1d9bdSSatish Balay if (j < n) ncols = j-i; 2257ef1d9bdSSatish Balay else ncols = n-i; 2267ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 2277ef1d9bdSSatish Balay ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 2287ef1d9bdSSatish Balay i = j; 2298965ea79SLois Curfman McInnes } 2307ef1d9bdSSatish Balay } 2318798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 2328965ea79SLois Curfman McInnes 23339ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 23439ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 2358965ea79SLois Curfman McInnes 2366d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 23739ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2388965ea79SLois Curfman McInnes } 2393a40ed3dSBarry Smith PetscFunctionReturn(0); 2408965ea79SLois Curfman McInnes } 2418965ea79SLois Curfman McInnes 2425615d1e5SSatish Balay #undef __FUNC__ 2435615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIDense" 2448f6be9afSLois Curfman McInnes int MatZeroEntries_MPIDense(Mat A) 2458965ea79SLois Curfman McInnes { 2463a40ed3dSBarry Smith int ierr; 24739ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2483a40ed3dSBarry Smith 2493a40ed3dSBarry Smith PetscFunctionBegin; 2503a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 2513a40ed3dSBarry Smith PetscFunctionReturn(0); 2528965ea79SLois Curfman McInnes } 2538965ea79SLois Curfman McInnes 2545615d1e5SSatish Balay #undef __FUNC__ 2555615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIDense" 2568f6be9afSLois Curfman McInnes int MatGetBlockSize_MPIDense(Mat A,int *bs) 2574e220ebcSLois Curfman McInnes { 2583a40ed3dSBarry Smith PetscFunctionBegin; 2594e220ebcSLois Curfman McInnes *bs = 1; 2603a40ed3dSBarry Smith PetscFunctionReturn(0); 2614e220ebcSLois Curfman McInnes } 2624e220ebcSLois Curfman McInnes 2638965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2648965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2658965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2663501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2678965ea79SLois Curfman McInnes routine. 2688965ea79SLois Curfman McInnes */ 2695615d1e5SSatish Balay #undef __FUNC__ 2705615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIDense" 2718f6be9afSLois Curfman McInnes int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2728965ea79SLois Curfman McInnes { 27339ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2748965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2758965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2768965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2778965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2788965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2798965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2808965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2818965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2828965ea79SLois Curfman McInnes IS istmp; 2838965ea79SLois Curfman McInnes 2843a40ed3dSBarry Smith PetscFunctionBegin; 28577c4ece6SBarry Smith ierr = ISGetSize(is,&N);CHKERRQ(ierr); 2868965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 2878965ea79SLois Curfman McInnes 2888965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2890452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 290549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 291549d3d68SSatish Balay procs = nprocs + size; 2920452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 2938965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2948965ea79SLois Curfman McInnes idx = rows[i]; 2958965ea79SLois Curfman McInnes found = 0; 2968965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2978965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2988965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2998965ea79SLois Curfman McInnes } 3008965ea79SLois Curfman McInnes } 301a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 3028965ea79SLois Curfman McInnes } 3038965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3048965ea79SLois Curfman McInnes 3058965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 306*6831982aSBarry Smith work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 307*6831982aSBarry Smith ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 3088965ea79SLois Curfman McInnes nmax = work[rank]; 309*6831982aSBarry Smith nrecvs = work[size+rank]; 310606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 3118965ea79SLois Curfman McInnes 3128965ea79SLois Curfman McInnes /* post receives: */ 3133a40ed3dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 3143a40ed3dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 3158965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 316ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 3178965ea79SLois Curfman McInnes } 3188965ea79SLois Curfman McInnes 3198965ea79SLois Curfman McInnes /* do sends: 3208965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 3218965ea79SLois Curfman McInnes the ith processor 3228965ea79SLois Curfman McInnes */ 3230452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 3247056b6fcSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 3250452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 3268965ea79SLois Curfman McInnes starts[0] = 0; 3278965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3288965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 3298965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3308965ea79SLois Curfman McInnes } 3318965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3328965ea79SLois Curfman McInnes 3338965ea79SLois Curfman McInnes starts[0] = 0; 3348965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3358965ea79SLois Curfman McInnes count = 0; 3368965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3378965ea79SLois Curfman McInnes if (procs[i]) { 338ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 3398965ea79SLois Curfman McInnes } 3408965ea79SLois Curfman McInnes } 341606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 3428965ea79SLois Curfman McInnes 3438965ea79SLois Curfman McInnes base = owners[rank]; 3448965ea79SLois Curfman McInnes 3458965ea79SLois Curfman McInnes /* wait on receives */ 3460452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 3478965ea79SLois Curfman McInnes source = lens + nrecvs; 3488965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3498965ea79SLois Curfman McInnes while (count) { 350ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 3518965ea79SLois Curfman McInnes /* unpack receives into our local space */ 352ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 3538965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3548965ea79SLois Curfman McInnes lens[imdex] = n; 3558965ea79SLois Curfman McInnes slen += n; 3568965ea79SLois Curfman McInnes count--; 3578965ea79SLois Curfman McInnes } 358606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 3598965ea79SLois Curfman McInnes 3608965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3610452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 3628965ea79SLois Curfman McInnes count = 0; 3638965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3648965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3658965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3668965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3678965ea79SLois Curfman McInnes } 3688965ea79SLois Curfman McInnes } 369606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 370606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 371606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 372606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 3738965ea79SLois Curfman McInnes 3748965ea79SLois Curfman McInnes /* actually zap the local rows */ 375029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3768965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 377606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 3788965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 3798965ea79SLois Curfman McInnes ierr = ISDestroy(istmp);CHKERRQ(ierr); 3808965ea79SLois Curfman McInnes 3818965ea79SLois Curfman McInnes /* wait on sends */ 3828965ea79SLois Curfman McInnes if (nsends) { 3837056b6fcSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 384ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 385606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 3868965ea79SLois Curfman McInnes } 387606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 388606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 3898965ea79SLois Curfman McInnes 3903a40ed3dSBarry Smith PetscFunctionReturn(0); 3918965ea79SLois Curfman McInnes } 3928965ea79SLois Curfman McInnes 3935615d1e5SSatish Balay #undef __FUNC__ 3945615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIDense" 3958f6be9afSLois Curfman McInnes int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3968965ea79SLois Curfman McInnes { 39739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3988965ea79SLois Curfman McInnes int ierr; 399c456f294SBarry Smith 4003a40ed3dSBarry Smith PetscFunctionBegin; 40143a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 40243a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 40344cd7ae7SLois Curfman McInnes ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4043a40ed3dSBarry Smith PetscFunctionReturn(0); 4058965ea79SLois Curfman McInnes } 4068965ea79SLois Curfman McInnes 4075615d1e5SSatish Balay #undef __FUNC__ 4085615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIDense" 4098f6be9afSLois Curfman McInnes int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4108965ea79SLois Curfman McInnes { 41139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4128965ea79SLois Curfman McInnes int ierr; 413c456f294SBarry Smith 4143a40ed3dSBarry Smith PetscFunctionBegin; 41543a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 41643a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 41744cd7ae7SLois Curfman McInnes ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 4183a40ed3dSBarry Smith PetscFunctionReturn(0); 4198965ea79SLois Curfman McInnes } 4208965ea79SLois Curfman McInnes 4215615d1e5SSatish Balay #undef __FUNC__ 4225615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIDense" 4238f6be9afSLois Curfman McInnes int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 424096963f5SLois Curfman McInnes { 425096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 426096963f5SLois Curfman McInnes int ierr; 4273501a2bdSLois Curfman McInnes Scalar zero = 0.0; 428096963f5SLois Curfman McInnes 4293a40ed3dSBarry Smith PetscFunctionBegin; 4303501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy);CHKERRQ(ierr); 43144cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 432537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 433537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 4343a40ed3dSBarry Smith PetscFunctionReturn(0); 435096963f5SLois Curfman McInnes } 436096963f5SLois Curfman McInnes 4375615d1e5SSatish Balay #undef __FUNC__ 4385615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIDense" 4398f6be9afSLois Curfman McInnes int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 440096963f5SLois Curfman McInnes { 441096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 442096963f5SLois Curfman McInnes int ierr; 443096963f5SLois Curfman McInnes 4443a40ed3dSBarry Smith PetscFunctionBegin; 4453501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 44644cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 447537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 448537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 4493a40ed3dSBarry Smith PetscFunctionReturn(0); 450096963f5SLois Curfman McInnes } 451096963f5SLois Curfman McInnes 4525615d1e5SSatish Balay #undef __FUNC__ 4535615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIDense" 4548f6be9afSLois Curfman McInnes int MatGetDiagonal_MPIDense(Mat A,Vec v) 4558965ea79SLois Curfman McInnes { 45639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 457096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 45844cd7ae7SLois Curfman McInnes int ierr, len, i, n, m = a->m, radd; 45944cd7ae7SLois Curfman McInnes Scalar *x, zero = 0.0; 460ed3cc1f0SBarry Smith 4613a40ed3dSBarry Smith PetscFunctionBegin; 46244cd7ae7SLois Curfman McInnes VecSet(&zero,v); 463096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x);CHKERRQ(ierr); 464096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 465a8c6a408SBarry Smith if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 46644cd7ae7SLois Curfman McInnes len = PetscMin(aloc->m,aloc->n); 4677ddc982cSLois Curfman McInnes radd = a->rstart*m; 46844cd7ae7SLois Curfman McInnes for ( i=0; i<len; i++ ) { 469096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 470096963f5SLois Curfman McInnes } 4719a8c540fSSatish Balay ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 4723a40ed3dSBarry Smith PetscFunctionReturn(0); 4738965ea79SLois Curfman McInnes } 4748965ea79SLois Curfman McInnes 4755615d1e5SSatish Balay #undef __FUNC__ 4765615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIDense" 477e1311b90SBarry Smith int MatDestroy_MPIDense(Mat mat) 4788965ea79SLois Curfman McInnes { 4793501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4808965ea79SLois Curfman McInnes int ierr; 481ed3cc1f0SBarry Smith 4823a40ed3dSBarry Smith PetscFunctionBegin; 48394d884c6SBarry Smith 48494d884c6SBarry Smith if (mat->mapping) { 48594d884c6SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 48694d884c6SBarry Smith } 48794d884c6SBarry Smith if (mat->bmapping) { 48894d884c6SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 48994d884c6SBarry Smith } 490aa482453SBarry Smith #if defined(PETSC_USE_LOG) 491e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4928965ea79SLois Curfman McInnes #endif 4938798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 494606d414cSSatish Balay ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 4953501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 4963501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4973501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 498622d7880SLois Curfman McInnes if (mdn->factor) { 499606d414cSSatish Balay if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 500606d414cSSatish Balay if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 501606d414cSSatish Balay if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 502606d414cSSatish Balay ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 503622d7880SLois Curfman McInnes } 504606d414cSSatish Balay ierr = PetscFree(mdn);CHKERRQ(ierr); 50561b13de0SBarry Smith if (mat->rmap) { 50661b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 50761b13de0SBarry Smith } 50861b13de0SBarry Smith if (mat->cmap) { 50961b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 51090f02eecSBarry Smith } 5118965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 5120452661fSBarry Smith PetscHeaderDestroy(mat); 5133a40ed3dSBarry Smith PetscFunctionReturn(0); 5148965ea79SLois Curfman McInnes } 51539ddd567SLois Curfman McInnes 5165615d1e5SSatish Balay #undef __FUNC__ 5175615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_Binary" 51839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 5198965ea79SLois Curfman McInnes { 52039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 5218965ea79SLois Curfman McInnes int ierr; 5227056b6fcSBarry Smith 5233a40ed3dSBarry Smith PetscFunctionBegin; 52439ddd567SLois Curfman McInnes if (mdn->size == 1) { 52539ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 5268965ea79SLois Curfman McInnes } 527a8c6a408SBarry Smith else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 5283a40ed3dSBarry Smith PetscFunctionReturn(0); 5298965ea79SLois Curfman McInnes } 5308965ea79SLois Curfman McInnes 5315615d1e5SSatish Balay #undef __FUNC__ 5325615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_ASCII" 53339ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 5348965ea79SLois Curfman McInnes { 53539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 53677ed5343SBarry Smith int ierr, format, size = mdn->size, rank = mdn->rank; 5378965ea79SLois Curfman McInnes FILE *fd; 53819bcc07fSBarry Smith ViewerType vtype; 5398965ea79SLois Curfman McInnes 5403a40ed3dSBarry Smith PetscFunctionBegin; 5413a40ed3dSBarry Smith ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 54290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 543888f2ed8SSatish Balay ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 544639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5454e220ebcSLois Curfman McInnes MatInfo info; 546888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 547*6831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 548*6831982aSBarry Smith (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 549*6831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 5503501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 5513a40ed3dSBarry Smith PetscFunctionReturn(0); 55296f6c058SBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 5533a40ed3dSBarry Smith PetscFunctionReturn(0); 5548965ea79SLois Curfman McInnes } 55577ed5343SBarry Smith 5568965ea79SLois Curfman McInnes if (size == 1) { 55739ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 5583a40ed3dSBarry Smith } else { 5598965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5608965ea79SLois Curfman McInnes Mat A; 56139ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 56239ddd567SLois Curfman McInnes Scalar *vals; 56339ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5648965ea79SLois Curfman McInnes 5658965ea79SLois Curfman McInnes if (!rank) { 566be0abb6dSBarry Smith ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 5673a40ed3dSBarry Smith } else { 568be0abb6dSBarry Smith ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 5698965ea79SLois Curfman McInnes } 5708965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5718965ea79SLois Curfman McInnes 57239ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 57339ddd567SLois Curfman McInnes but it's quick for now */ 57439ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5758965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 57639ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 57739ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 57839ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 57939ddd567SLois Curfman McInnes row++; 5808965ea79SLois Curfman McInnes } 5818965ea79SLois Curfman McInnes 5826d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5836d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5848965ea79SLois Curfman McInnes if (!rank) { 585*6831982aSBarry Smith Viewer sviewer; 586*6831982aSBarry Smith ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 587*6831982aSBarry Smith ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 588*6831982aSBarry Smith ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 5898965ea79SLois Curfman McInnes } 590*6831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 5918965ea79SLois Curfman McInnes ierr = MatDestroy(A);CHKERRQ(ierr); 5928965ea79SLois Curfman McInnes } 5933a40ed3dSBarry Smith PetscFunctionReturn(0); 5948965ea79SLois Curfman McInnes } 5958965ea79SLois Curfman McInnes 5965615d1e5SSatish Balay #undef __FUNC__ 5975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense" 598e1311b90SBarry Smith int MatView_MPIDense(Mat mat,Viewer viewer) 5998965ea79SLois Curfman McInnes { 60039ddd567SLois Curfman McInnes int ierr; 601*6831982aSBarry Smith PetscTruth isascii,isbinary; 6028965ea79SLois Curfman McInnes 603433994e6SBarry Smith PetscFunctionBegin; 6040f5bd95cSBarry Smith 605*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 606*6831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 6070f5bd95cSBarry Smith 6080f5bd95cSBarry Smith if (isascii) { 60939ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 6100f5bd95cSBarry Smith } else if (isbinary) { 6113a40ed3dSBarry Smith ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 6125cd90555SBarry Smith } else { 6130f5bd95cSBarry Smith SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 6148965ea79SLois Curfman McInnes } 6153a40ed3dSBarry Smith PetscFunctionReturn(0); 6168965ea79SLois Curfman McInnes } 6178965ea79SLois Curfman McInnes 6185615d1e5SSatish Balay #undef __FUNC__ 6195615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIDense" 6208f6be9afSLois Curfman McInnes int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 6218965ea79SLois Curfman McInnes { 6223501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6233501a2bdSLois Curfman McInnes Mat mdn = mat->A; 6244e220ebcSLois Curfman McInnes int ierr; 6254e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 6268965ea79SLois Curfman McInnes 6273a40ed3dSBarry Smith PetscFunctionBegin; 6284e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 6294e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 6304e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 6314e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 6324e220ebcSLois Curfman McInnes info->block_size = 1.0; 6334e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 6344e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 6354e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 6368965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 6374e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 6384e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 6394e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 6404e220ebcSLois Curfman McInnes info->memory = isend[3]; 6414e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 6428965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 643f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 6444e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6454e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6464e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6474e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6484e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6498965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 650f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 6514e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6524e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6534e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6544e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6554e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6568965ea79SLois Curfman McInnes } 6574e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 6584e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 6594e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 6603a40ed3dSBarry Smith PetscFunctionReturn(0); 6618965ea79SLois Curfman McInnes } 6628965ea79SLois Curfman McInnes 6638c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 6648aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 6658aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 6668aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 6678c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 6688aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 6698aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 6708aaee692SLois Curfman McInnes 6715615d1e5SSatish Balay #undef __FUNC__ 6725615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIDense" 6738f6be9afSLois Curfman McInnes int MatSetOption_MPIDense(Mat A,MatOption op) 6748965ea79SLois Curfman McInnes { 67539ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6768965ea79SLois Curfman McInnes 6773a40ed3dSBarry Smith PetscFunctionBegin; 6786d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 6796d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 6804787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR || 6814787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 682219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 683219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED) { 684b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 685b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 686aeafbbfcSLois Curfman McInnes a->roworiented = 1; 6878965ea79SLois Curfman McInnes MatSetOption(a->A,op); 688b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 689219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 6906d4a8577SBarry Smith op == MAT_SYMMETRIC || 6916d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 692b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 693b51ba29fSSatish Balay op == MAT_USE_HASH_TABLE) { 694981c4779SBarry Smith PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 6953a40ed3dSBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 6963a40ed3dSBarry Smith a->roworiented = 0; MatSetOption(a->A,op); 6973782ba37SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 6983782ba37SSatish Balay a->donotstash = 1; 6993a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 7003a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 7013a40ed3dSBarry Smith } else { 7023a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 7033a40ed3dSBarry Smith } 7043a40ed3dSBarry Smith PetscFunctionReturn(0); 7058965ea79SLois Curfman McInnes } 7068965ea79SLois Curfman McInnes 7075615d1e5SSatish Balay #undef __FUNC__ 7085615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIDense" 7098f6be9afSLois Curfman McInnes int MatGetSize_MPIDense(Mat A,int *m,int *n) 7108965ea79SLois Curfman McInnes { 7113501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7123a40ed3dSBarry Smith 7133a40ed3dSBarry Smith PetscFunctionBegin; 7148965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 7153a40ed3dSBarry Smith PetscFunctionReturn(0); 7168965ea79SLois Curfman McInnes } 7178965ea79SLois Curfman McInnes 7185615d1e5SSatish Balay #undef __FUNC__ 7195615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIDense" 7208f6be9afSLois Curfman McInnes int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 7218965ea79SLois Curfman McInnes { 7223501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7233a40ed3dSBarry Smith 7243a40ed3dSBarry Smith PetscFunctionBegin; 7258965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 7263a40ed3dSBarry Smith PetscFunctionReturn(0); 7278965ea79SLois Curfman McInnes } 7288965ea79SLois Curfman McInnes 7295615d1e5SSatish Balay #undef __FUNC__ 7305615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIDense" 7318f6be9afSLois Curfman McInnes int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 7328965ea79SLois Curfman McInnes { 7333501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7343a40ed3dSBarry Smith 7353a40ed3dSBarry Smith PetscFunctionBegin; 7368965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 7373a40ed3dSBarry Smith PetscFunctionReturn(0); 7388965ea79SLois Curfman McInnes } 7398965ea79SLois Curfman McInnes 7405615d1e5SSatish Balay #undef __FUNC__ 7415615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIDense" 7428f6be9afSLois Curfman McInnes int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 7438965ea79SLois Curfman McInnes { 7443501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7453a40ed3dSBarry Smith int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 7468965ea79SLois Curfman McInnes 7473a40ed3dSBarry Smith PetscFunctionBegin; 748a8c6a408SBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 7498965ea79SLois Curfman McInnes lrow = row - rstart; 7503a40ed3dSBarry Smith ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 7513a40ed3dSBarry Smith PetscFunctionReturn(0); 7528965ea79SLois Curfman McInnes } 7538965ea79SLois Curfman McInnes 7545615d1e5SSatish Balay #undef __FUNC__ 7555615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIDense" 7568f6be9afSLois Curfman McInnes int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 7578965ea79SLois Curfman McInnes { 758606d414cSSatish Balay int ierr; 759606d414cSSatish Balay 7603a40ed3dSBarry Smith PetscFunctionBegin; 761606d414cSSatish Balay if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 762606d414cSSatish Balay if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 7633a40ed3dSBarry Smith PetscFunctionReturn(0); 7648965ea79SLois Curfman McInnes } 7658965ea79SLois Curfman McInnes 7665615d1e5SSatish Balay #undef __FUNC__ 7675b2fa520SLois Curfman McInnes #define __FUNC__ "MatDiagonalScale_MPIDense" 7685b2fa520SLois Curfman McInnes int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 7695b2fa520SLois Curfman McInnes { 7705b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 7715b2fa520SLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 7725b2fa520SLois Curfman McInnes Scalar *l,*r,x,*v; 77372d926a5SLois Curfman McInnes int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 7745b2fa520SLois Curfman McInnes 7755b2fa520SLois Curfman McInnes PetscFunctionBegin; 77672d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 7775b2fa520SLois Curfman McInnes if (ll) { 77872d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 77972d926a5SLois Curfman McInnes if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 7805b2fa520SLois Curfman McInnes ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 7815b2fa520SLois Curfman McInnes for ( i=0; i<m; i++ ) { 7825b2fa520SLois Curfman McInnes x = l[i]; 7835b2fa520SLois Curfman McInnes v = mat->v + i; 7845b2fa520SLois Curfman McInnes for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 7855b2fa520SLois Curfman McInnes } 7865b2fa520SLois Curfman McInnes ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 7875b2fa520SLois Curfman McInnes PLogFlops(n*m); 7885b2fa520SLois Curfman McInnes } 7895b2fa520SLois Curfman McInnes if (rr) { 79072d926a5SLois Curfman McInnes ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 79172d926a5SLois Curfman McInnes if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 7925b2fa520SLois Curfman McInnes ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 7935b2fa520SLois Curfman McInnes ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 7945b2fa520SLois Curfman McInnes ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 7955b2fa520SLois Curfman McInnes for ( i=0; i<n; i++ ) { 7965b2fa520SLois Curfman McInnes x = r[i]; 7975b2fa520SLois Curfman McInnes v = mat->v + i*m; 7985b2fa520SLois Curfman McInnes for ( j=0; j<m; j++ ) { (*v++) *= x;} 7995b2fa520SLois Curfman McInnes } 80072d926a5SLois Curfman McInnes ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 8015b2fa520SLois Curfman McInnes PLogFlops(n*m); 8025b2fa520SLois Curfman McInnes } 8035b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8045b2fa520SLois Curfman McInnes } 8055b2fa520SLois Curfman McInnes 8065b2fa520SLois Curfman McInnes #undef __FUNC__ 8075615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIDense" 8088f6be9afSLois Curfman McInnes int MatNorm_MPIDense(Mat A,NormType type,double *norm) 809096963f5SLois Curfman McInnes { 8103501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 8113501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 8123501a2bdSLois Curfman McInnes int ierr, i, j; 8133501a2bdSLois Curfman McInnes double sum = 0.0; 8143501a2bdSLois Curfman McInnes Scalar *v = mat->v; 8153501a2bdSLois Curfman McInnes 8163a40ed3dSBarry Smith PetscFunctionBegin; 8173501a2bdSLois Curfman McInnes if (mdn->size == 1) { 8183501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 8193501a2bdSLois Curfman McInnes } else { 8203501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 8213501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 822aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 823e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 8243501a2bdSLois Curfman McInnes #else 8253501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 8263501a2bdSLois Curfman McInnes #endif 8273501a2bdSLois Curfman McInnes } 828ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 8293501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 8303501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 8313a40ed3dSBarry Smith } else if (type == NORM_1) { 8323501a2bdSLois Curfman McInnes double *tmp, *tmp2; 8330452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 8343501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 835549d3d68SSatish Balay ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 836096963f5SLois Curfman McInnes *norm = 0.0; 8373501a2bdSLois Curfman McInnes v = mat->v; 8383501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 8393501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 84067e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8413501a2bdSLois Curfman McInnes } 8423501a2bdSLois Curfman McInnes } 843ca161407SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 8443501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 8453501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 8463501a2bdSLois Curfman McInnes } 847606d414cSSatish Balay ierr = PetscFree(tmp);CHKERRQ(ierr); 8483501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 8493a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 8503501a2bdSLois Curfman McInnes double ntemp; 8513501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 852ca161407SBarry Smith ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 8533a40ed3dSBarry Smith } else { 854a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 8553501a2bdSLois Curfman McInnes } 8563501a2bdSLois Curfman McInnes } 8573a40ed3dSBarry Smith PetscFunctionReturn(0); 8583501a2bdSLois Curfman McInnes } 8593501a2bdSLois Curfman McInnes 8605615d1e5SSatish Balay #undef __FUNC__ 8615615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIDense" 8628f6be9afSLois Curfman McInnes int MatTranspose_MPIDense(Mat A,Mat *matout) 8633501a2bdSLois Curfman McInnes { 8643501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 8653501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 8663501a2bdSLois Curfman McInnes Mat B; 8673501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 8683501a2bdSLois Curfman McInnes int j, i, ierr; 8693501a2bdSLois Curfman McInnes Scalar *v; 8703501a2bdSLois Curfman McInnes 8713a40ed3dSBarry Smith PetscFunctionBegin; 8727056b6fcSBarry Smith if (matout == PETSC_NULL && M != N) { 873a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 8747056b6fcSBarry Smith } 8757056b6fcSBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 8763501a2bdSLois Curfman McInnes 8773501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 8780452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 8793501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 8803501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 8813501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 8823501a2bdSLois Curfman McInnes v += m; 8833501a2bdSLois Curfman McInnes } 884606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 8856d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8866d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8873638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 8883501a2bdSLois Curfman McInnes *matout = B; 8893501a2bdSLois Curfman McInnes } else { 890f830108cSBarry Smith PetscOps *Abops; 89109dc0095SBarry Smith MatOps Aops; 892f830108cSBarry Smith 8933501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 894606d414cSSatish Balay ierr = PetscFree(a->rowners);CHKERRQ(ierr); 8953501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A);CHKERRQ(ierr); 8963501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 8973501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 898606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 899f830108cSBarry Smith 900f830108cSBarry Smith /* 901f830108cSBarry Smith This is horrible, horrible code. We need to keep the 902f830108cSBarry Smith A pointers for the bops and ops but copy everything 903f830108cSBarry Smith else from C. 904f830108cSBarry Smith */ 905f830108cSBarry Smith Abops = A->bops; 906f830108cSBarry Smith Aops = A->ops; 907549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 908f830108cSBarry Smith A->bops = Abops; 909f830108cSBarry Smith A->ops = Aops; 910f830108cSBarry Smith 9110452661fSBarry Smith PetscHeaderDestroy(B); 9123501a2bdSLois Curfman McInnes } 9133a40ed3dSBarry Smith PetscFunctionReturn(0); 914096963f5SLois Curfman McInnes } 915096963f5SLois Curfman McInnes 916eadb2fb4SBarry Smith #include "pinclude/blaslapack.h" 9175615d1e5SSatish Balay #undef __FUNC__ 9185615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIDense" 9198f6be9afSLois Curfman McInnes int MatScale_MPIDense(Scalar *alpha,Mat inA) 92044cd7ae7SLois Curfman McInnes { 92144cd7ae7SLois Curfman McInnes Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 92244cd7ae7SLois Curfman McInnes Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 92344cd7ae7SLois Curfman McInnes int one = 1, nz; 92444cd7ae7SLois Curfman McInnes 9253a40ed3dSBarry Smith PetscFunctionBegin; 92644cd7ae7SLois Curfman McInnes nz = a->m*a->n; 92744cd7ae7SLois Curfman McInnes BLscal_( &nz, alpha, a->v, &one ); 92844cd7ae7SLois Curfman McInnes PLogFlops(nz); 9293a40ed3dSBarry Smith PetscFunctionReturn(0); 93044cd7ae7SLois Curfman McInnes } 93144cd7ae7SLois Curfman McInnes 9325609ef8eSBarry Smith static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 9337b2a1423SBarry Smith extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 9348965ea79SLois Curfman McInnes 9358965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 93609dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 93709dc0095SBarry Smith MatGetRow_MPIDense, 93809dc0095SBarry Smith MatRestoreRow_MPIDense, 93909dc0095SBarry Smith MatMult_MPIDense, 94009dc0095SBarry Smith MatMultAdd_MPIDense, 94109dc0095SBarry Smith MatMultTrans_MPIDense, 94209dc0095SBarry Smith MatMultTransAdd_MPIDense, 9438965ea79SLois Curfman McInnes 0, 94409dc0095SBarry Smith 0, 94509dc0095SBarry Smith 0, 94609dc0095SBarry Smith 0, 94709dc0095SBarry Smith 0, 94809dc0095SBarry Smith 0, 94909dc0095SBarry Smith 0, 95009dc0095SBarry Smith MatTranspose_MPIDense, 95109dc0095SBarry Smith MatGetInfo_MPIDense,0, 95209dc0095SBarry Smith MatGetDiagonal_MPIDense, 9535b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 95409dc0095SBarry Smith MatNorm_MPIDense, 95509dc0095SBarry Smith MatAssemblyBegin_MPIDense, 95609dc0095SBarry Smith MatAssemblyEnd_MPIDense, 95709dc0095SBarry Smith 0, 95809dc0095SBarry Smith MatSetOption_MPIDense, 95909dc0095SBarry Smith MatZeroEntries_MPIDense, 96009dc0095SBarry Smith MatZeroRows_MPIDense, 96109dc0095SBarry Smith 0, 96209dc0095SBarry Smith 0, 96309dc0095SBarry Smith 0, 96409dc0095SBarry Smith 0, 96509dc0095SBarry Smith MatGetSize_MPIDense, 96609dc0095SBarry Smith MatGetLocalSize_MPIDense, 96739ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 96809dc0095SBarry Smith 0, 96909dc0095SBarry Smith 0, 97009dc0095SBarry Smith MatGetArray_MPIDense, 97109dc0095SBarry Smith MatRestoreArray_MPIDense, 9725609ef8eSBarry Smith MatDuplicate_MPIDense, 97309dc0095SBarry Smith 0, 97409dc0095SBarry Smith 0, 97509dc0095SBarry Smith 0, 97609dc0095SBarry Smith 0, 97709dc0095SBarry Smith 0, 9782ce60cd0SSatish Balay MatGetSubMatrices_MPIDense, 97909dc0095SBarry Smith 0, 98009dc0095SBarry Smith MatGetValues_MPIDense, 98109dc0095SBarry Smith 0, 98209dc0095SBarry Smith 0, 98309dc0095SBarry Smith MatScale_MPIDense, 98409dc0095SBarry Smith 0, 98509dc0095SBarry Smith 0, 98609dc0095SBarry Smith 0, 98709dc0095SBarry Smith MatGetBlockSize_MPIDense, 98809dc0095SBarry Smith 0, 98909dc0095SBarry Smith 0, 99009dc0095SBarry Smith 0, 99109dc0095SBarry Smith 0, 99209dc0095SBarry Smith 0, 99309dc0095SBarry Smith 0, 99409dc0095SBarry Smith 0, 99509dc0095SBarry Smith 0, 99609dc0095SBarry Smith 0, 997ca3fa75bSLois Curfman McInnes MatGetSubMatrix_MPIDense, 99809dc0095SBarry Smith 0, 99909dc0095SBarry Smith 0, 100009dc0095SBarry Smith MatGetMaps_Petsc}; 10018965ea79SLois Curfman McInnes 10025615d1e5SSatish Balay #undef __FUNC__ 10035615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIDense" 10048965ea79SLois Curfman McInnes /*@C 100539ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 10068965ea79SLois Curfman McInnes 1007db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1008db81eaa0SLois Curfman McInnes 10098965ea79SLois Curfman McInnes Input Parameters: 1010db81eaa0SLois Curfman McInnes + comm - MPI communicator 10118965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1012db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 10138965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1014db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1015db81eaa0SLois Curfman McInnes - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1016dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 10178965ea79SLois Curfman McInnes 10188965ea79SLois Curfman McInnes Output Parameter: 1019477f1c0bSLois Curfman McInnes . A - the matrix 10208965ea79SLois Curfman McInnes 1021b259b22eSLois Curfman McInnes Notes: 102239ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 102339ddd567SLois Curfman McInnes storage by columns. 10248965ea79SLois Curfman McInnes 102518f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 102618f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 1027b4fd4287SBarry Smith set data=PETSC_NULL. 102818f449edSLois Curfman McInnes 10298965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 10308965ea79SLois Curfman McInnes (possibly both). 10318965ea79SLois Curfman McInnes 1032027ccd11SLois Curfman McInnes Level: intermediate 1033027ccd11SLois Curfman McInnes 103439ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 10358965ea79SLois Curfman McInnes 103639ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 10378965ea79SLois Curfman McInnes @*/ 1038477f1c0bSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 10398965ea79SLois Curfman McInnes { 10408965ea79SLois Curfman McInnes Mat mat; 104139ddd567SLois Curfman McInnes Mat_MPIDense *a; 104225cdf11fSBarry Smith int ierr, i,flg; 10438965ea79SLois Curfman McInnes 10443a40ed3dSBarry Smith PetscFunctionBegin; 1045ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 1046ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 104718f449edSLois Curfman McInnes 1048477f1c0bSLois Curfman McInnes *A = 0; 10493f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 10508965ea79SLois Curfman McInnes PLogObjectCreate(mat); 10510452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1052549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1053e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIDense; 1054e1311b90SBarry Smith mat->ops->view = MatView_MPIDense; 10558965ea79SLois Curfman McInnes mat->factor = 0; 105690f02eecSBarry Smith mat->mapping = 0; 10578965ea79SLois Curfman McInnes 1058622d7880SLois Curfman McInnes a->factor = 0; 1059e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 1060d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1061d132466eSBarry Smith ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 10628965ea79SLois Curfman McInnes 106396f6c058SBarry Smith ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 106439ddd567SLois Curfman McInnes 1065be0abb6dSBarry Smith ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1066be0abb6dSBarry Smith a->nvec = n; 1067c7fcc2eaSBarry Smith 106839ddd567SLois Curfman McInnes /* each row stores all columns */ 1069aca0ad90SLois Curfman McInnes a->N = mat->N = N; 1070aca0ad90SLois Curfman McInnes a->M = mat->M = M; 1071aca0ad90SLois Curfman McInnes a->m = mat->m = m; 1072be0abb6dSBarry Smith a->n = mat->n = N; /* NOTE: n == N */ 10738965ea79SLois Curfman McInnes 1074c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 1075c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 1076488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1077be0abb6dSBarry Smith ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1078c7fcc2eaSBarry Smith 10798965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 1080d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1081d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 1082f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1083ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 10848965ea79SLois Curfman McInnes a->rowners[0] = 0; 10858965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 10868965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 10878965ea79SLois Curfman McInnes } 10888965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 10898965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1090ca161407SBarry Smith ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1091d7e8b826SBarry Smith a->cowners[0] = 0; 1092d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 1093d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 1094d7e8b826SBarry Smith } 10958965ea79SLois Curfman McInnes 1096029af93fSBarry Smith ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 10978965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 10988965ea79SLois Curfman McInnes 10998965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 11003782ba37SSatish Balay a->donotstash = 0; 11018798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 11028965ea79SLois Curfman McInnes 11038965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 11048965ea79SLois Curfman McInnes a->lvec = 0; 11058965ea79SLois Curfman McInnes a->Mvctx = 0; 110639b7565bSBarry Smith a->roworiented = 1; 11078965ea79SLois Curfman McInnes 11080de54da6SSatish Balay ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C", 11090de54da6SSatish Balay "MatGetDiagonalBlock_MPIDense", 11100de54da6SSatish Balay (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 11110de54da6SSatish Balay 1112477f1c0bSLois Curfman McInnes *A = mat; 111325cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 111425cdf11fSBarry Smith if (flg) { 11158c469469SLois Curfman McInnes ierr = MatPrintHelp(mat);CHKERRQ(ierr); 11168c469469SLois Curfman McInnes } 11173a40ed3dSBarry Smith PetscFunctionReturn(0); 11188965ea79SLois Curfman McInnes } 11198965ea79SLois Curfman McInnes 11205615d1e5SSatish Balay #undef __FUNC__ 11215609ef8eSBarry Smith #define __FUNC__ "MatDuplicate_MPIDense" 11225609ef8eSBarry Smith static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 11238965ea79SLois Curfman McInnes { 11248965ea79SLois Curfman McInnes Mat mat; 11253501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 112639ddd567SLois Curfman McInnes int ierr; 11272ba99913SLois Curfman McInnes FactorCtx *factor; 11288965ea79SLois Curfman McInnes 11293a40ed3dSBarry Smith PetscFunctionBegin; 11308965ea79SLois Curfman McInnes *newmat = 0; 11313f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 11328965ea79SLois Curfman McInnes PLogObjectCreate(mat); 11330452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1134549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1135e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIDense; 1136e1311b90SBarry Smith mat->ops->view = MatView_MPIDense; 11373501a2bdSLois Curfman McInnes mat->factor = A->factor; 1138c456f294SBarry Smith mat->assembled = PETSC_TRUE; 11398965ea79SLois Curfman McInnes 114044cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 114144cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 114244cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 114344cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 11442ba99913SLois Curfman McInnes if (oldmat->factor) { 11452ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 11462ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 11472ba99913SLois Curfman McInnes } else a->factor = 0; 11488965ea79SLois Curfman McInnes 11498965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 11508965ea79SLois Curfman McInnes a->rend = oldmat->rend; 11518965ea79SLois Curfman McInnes a->size = oldmat->size; 11528965ea79SLois Curfman McInnes a->rank = oldmat->rank; 1153e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 11543782ba37SSatish Balay a->donotstash = oldmat->donotstash; 11550452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1156f09e8eb9SSatish Balay PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1157549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 11588798bf22SSatish Balay ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 11598965ea79SLois Curfman McInnes 11608965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 11618965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 116255659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 11638965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 11645609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 11658965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 11668965ea79SLois Curfman McInnes *newmat = mat; 11673a40ed3dSBarry Smith PetscFunctionReturn(0); 11688965ea79SLois Curfman McInnes } 11698965ea79SLois Curfman McInnes 117077c4ece6SBarry Smith #include "sys.h" 11718965ea79SLois Curfman McInnes 11725615d1e5SSatish Balay #undef __FUNC__ 11735615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 117490ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 117590ace30eSBarry Smith { 117640011551SBarry Smith int *rowners, i,size,rank,m,ierr,nz,j; 117790ace30eSBarry Smith Scalar *array,*vals,*vals_ptr; 117890ace30eSBarry Smith MPI_Status status; 117990ace30eSBarry Smith 11803a40ed3dSBarry Smith PetscFunctionBegin; 1181d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1182d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 118390ace30eSBarry Smith 118490ace30eSBarry Smith /* determine ownership of all rows */ 118590ace30eSBarry Smith m = M/size + ((M % size) > rank); 118690ace30eSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1187ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 118890ace30eSBarry Smith rowners[0] = 0; 118990ace30eSBarry Smith for ( i=2; i<=size; i++ ) { 119090ace30eSBarry Smith rowners[i] += rowners[i-1]; 119190ace30eSBarry Smith } 119290ace30eSBarry Smith 119390ace30eSBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 119490ace30eSBarry Smith ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 119590ace30eSBarry Smith 119690ace30eSBarry Smith if (!rank) { 119790ace30eSBarry Smith vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 119890ace30eSBarry Smith 119990ace30eSBarry Smith /* read in my part of the matrix numerical values */ 12000752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 120190ace30eSBarry Smith 120290ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 120390ace30eSBarry Smith vals_ptr = vals; 120490ace30eSBarry Smith for ( i=0; i<m; i++ ) { 120590ace30eSBarry Smith for ( j=0; j<N; j++ ) { 120690ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 120790ace30eSBarry Smith } 120890ace30eSBarry Smith } 120990ace30eSBarry Smith 121090ace30eSBarry Smith /* read in other processors and ship out */ 121190ace30eSBarry Smith for ( i=1; i<size; i++ ) { 121290ace30eSBarry Smith nz = (rowners[i+1] - rowners[i])*N; 12130752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1214ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 121590ace30eSBarry Smith } 12163a40ed3dSBarry Smith } else { 121790ace30eSBarry Smith /* receive numeric values */ 121890ace30eSBarry Smith vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 121990ace30eSBarry Smith 122090ace30eSBarry Smith /* receive message of values*/ 1221ca161407SBarry Smith ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 122290ace30eSBarry Smith 122390ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 122490ace30eSBarry Smith vals_ptr = vals; 122590ace30eSBarry Smith for ( i=0; i<m; i++ ) { 122690ace30eSBarry Smith for ( j=0; j<N; j++ ) { 122790ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 122890ace30eSBarry Smith } 122990ace30eSBarry Smith } 123090ace30eSBarry Smith } 1231606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 1232606d414cSSatish Balay ierr = PetscFree(vals);CHKERRQ(ierr); 12336d4a8577SBarry Smith ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 12346d4a8577SBarry Smith ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 12353a40ed3dSBarry Smith PetscFunctionReturn(0); 123690ace30eSBarry Smith } 123790ace30eSBarry Smith 123890ace30eSBarry Smith 12395615d1e5SSatish Balay #undef __FUNC__ 12405615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense" 124119bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 12428965ea79SLois Curfman McInnes { 12438965ea79SLois Curfman McInnes Mat A; 12448965ea79SLois Curfman McInnes Scalar *vals,*svals; 124519bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 12468965ea79SLois Curfman McInnes MPI_Status status; 12478965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 12488965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 124919bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 12503a40ed3dSBarry Smith int i, nz, ierr, j,rstart, rend, fd; 12518965ea79SLois Curfman McInnes 12523a40ed3dSBarry Smith PetscFunctionBegin; 1253d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1254d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 12558965ea79SLois Curfman McInnes if (!rank) { 125690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 12570752156aSBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1258a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 12598965ea79SLois Curfman McInnes } 12608965ea79SLois Curfman McInnes 1261ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 126290ace30eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 126390ace30eSBarry Smith 126490ace30eSBarry Smith /* 126590ace30eSBarry Smith Handle case where matrix is stored on disk as a dense matrix 126690ace30eSBarry Smith */ 126790ace30eSBarry Smith if (nz == MATRIX_BINARY_FORMAT_DENSE) { 12683a40ed3dSBarry Smith ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 12693a40ed3dSBarry Smith PetscFunctionReturn(0); 127090ace30eSBarry Smith } 127190ace30eSBarry Smith 12728965ea79SLois Curfman McInnes /* determine ownership of all rows */ 12738965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 12740452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1275ca161407SBarry Smith ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 12768965ea79SLois Curfman McInnes rowners[0] = 0; 12778965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 12788965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 12798965ea79SLois Curfman McInnes } 12808965ea79SLois Curfman McInnes rstart = rowners[rank]; 12818965ea79SLois Curfman McInnes rend = rowners[rank+1]; 12828965ea79SLois Curfman McInnes 12838965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 12840452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 12858965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 12868965ea79SLois Curfman McInnes if (!rank) { 12870452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 12880752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 12890452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 12908965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1291ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1292606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1293ca161407SBarry Smith } else { 1294ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 12958965ea79SLois Curfman McInnes } 12968965ea79SLois Curfman McInnes 12978965ea79SLois Curfman McInnes if (!rank) { 12988965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 12990452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1300549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 13018965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 13028965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 13038965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 13048965ea79SLois Curfman McInnes } 13058965ea79SLois Curfman McInnes } 1306606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 13078965ea79SLois Curfman McInnes 13088965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 13098965ea79SLois Curfman McInnes maxnz = 0; 13108965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 13110452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 13128965ea79SLois Curfman McInnes } 13130452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 13148965ea79SLois Curfman McInnes 13158965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 13168965ea79SLois Curfman McInnes nz = procsnz[0]; 13170452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 13180752156aSBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 13198965ea79SLois Curfman McInnes 13208965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 13218965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 13228965ea79SLois Curfman McInnes nz = procsnz[i]; 13230752156aSBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1324ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 13258965ea79SLois Curfman McInnes } 1326606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 13273a40ed3dSBarry Smith } else { 13288965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 13298965ea79SLois Curfman McInnes nz = 0; 13308965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13318965ea79SLois Curfman McInnes nz += ourlens[i]; 13328965ea79SLois Curfman McInnes } 13330452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 13348965ea79SLois Curfman McInnes 13358965ea79SLois Curfman McInnes /* receive message of column indices*/ 1336ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1337ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1338a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 13398965ea79SLois Curfman McInnes } 13408965ea79SLois Curfman McInnes 13418965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1342549d3d68SSatish Balay ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 13438965ea79SLois Curfman McInnes jj = 0; 13448965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13458965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 13468965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 13478965ea79SLois Curfman McInnes jj++; 13488965ea79SLois Curfman McInnes } 13498965ea79SLois Curfman McInnes } 13508965ea79SLois Curfman McInnes 13518965ea79SLois Curfman McInnes /* create our matrix */ 13528965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13538965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 13548965ea79SLois Curfman McInnes } 1355b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 13568965ea79SLois Curfman McInnes A = *newmat; 13578965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13588965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 13598965ea79SLois Curfman McInnes } 13608965ea79SLois Curfman McInnes 13618965ea79SLois Curfman McInnes if (!rank) { 13620452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 13638965ea79SLois Curfman McInnes 13648965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 13658965ea79SLois Curfman McInnes nz = procsnz[0]; 13660752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 13678965ea79SLois Curfman McInnes 13688965ea79SLois Curfman McInnes /* insert into matrix */ 13698965ea79SLois Curfman McInnes jj = rstart; 13708965ea79SLois Curfman McInnes smycols = mycols; 13718965ea79SLois Curfman McInnes svals = vals; 13728965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 13738965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 13748965ea79SLois Curfman McInnes smycols += ourlens[i]; 13758965ea79SLois Curfman McInnes svals += ourlens[i]; 13768965ea79SLois Curfman McInnes jj++; 13778965ea79SLois Curfman McInnes } 13788965ea79SLois Curfman McInnes 13798965ea79SLois Curfman McInnes /* read in other processors and ship out */ 13808965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 13818965ea79SLois Curfman McInnes nz = procsnz[i]; 13820752156aSBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1383ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 13848965ea79SLois Curfman McInnes } 1385606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 13863a40ed3dSBarry Smith } else { 13878965ea79SLois Curfman McInnes /* receive numeric values */ 13880452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 13898965ea79SLois Curfman McInnes 13908965ea79SLois Curfman McInnes /* receive message of values*/ 1391ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1392ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1393a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 13948965ea79SLois Curfman McInnes 13958965ea79SLois Curfman McInnes /* insert into matrix */ 13968965ea79SLois Curfman McInnes jj = rstart; 13978965ea79SLois Curfman McInnes smycols = mycols; 13988965ea79SLois Curfman McInnes svals = vals; 13998965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 14008965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 14018965ea79SLois Curfman McInnes smycols += ourlens[i]; 14028965ea79SLois Curfman McInnes svals += ourlens[i]; 14038965ea79SLois Curfman McInnes jj++; 14048965ea79SLois Curfman McInnes } 14058965ea79SLois Curfman McInnes } 1406606d414cSSatish Balay ierr = PetscFree(ourlens);CHKERRQ(ierr); 1407606d414cSSatish Balay ierr = PetscFree(vals);CHKERRQ(ierr); 1408606d414cSSatish Balay ierr = PetscFree(mycols);CHKERRQ(ierr); 1409606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 14108965ea79SLois Curfman McInnes 14116d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14126d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14133a40ed3dSBarry Smith PetscFunctionReturn(0); 14148965ea79SLois Curfman McInnes } 141590ace30eSBarry Smith 141690ace30eSBarry Smith 141790ace30eSBarry Smith 141890ace30eSBarry Smith 141990ace30eSBarry Smith 1420