18965ea79SLois Curfman McInnes #ifndef lint 2*c2653b3dSLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.66 1997/03/09 17:58:12 curfman Exp bsmith $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 970f55243SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h" 10f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 118965ea79SLois Curfman McInnes 125615d1e5SSatish Balay #undef __FUNC__ 135615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIDense" 147056b6fcSBarry Smith static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 158965ea79SLois Curfman McInnes { 1639b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1739b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1839b7565bSBarry Smith int roworiented = A->roworiented; 198965ea79SLois Curfman McInnes 208965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 21e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 22e3372554SBarry Smith if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large"); 238965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 248965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2539b7565bSBarry Smith if (roworiented) { 2639b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 2739b7565bSBarry Smith } 2839b7565bSBarry Smith else { 298965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 30e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 31e3372554SBarry Smith if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large"); 3239b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3339b7565bSBarry Smith } 348965ea79SLois Curfman McInnes } 358965ea79SLois Curfman McInnes } 368965ea79SLois Curfman McInnes else { 3739b7565bSBarry Smith if (roworiented) { 3839b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 3939b7565bSBarry Smith } 4039b7565bSBarry Smith else { /* must stash each seperately */ 4139b7565bSBarry Smith row = idxm[i]; 4239b7565bSBarry Smith for ( j=0; j<n; j++ ) { 437056b6fcSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 4439b7565bSBarry Smith } 4539b7565bSBarry Smith } 46b49de8d1SLois Curfman McInnes } 47b49de8d1SLois Curfman McInnes } 48b49de8d1SLois Curfman McInnes return 0; 49b49de8d1SLois Curfman McInnes } 50b49de8d1SLois Curfman McInnes 515615d1e5SSatish Balay #undef __FUNC__ 525615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIDense" 53b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 54b49de8d1SLois Curfman McInnes { 55b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 56b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 57b49de8d1SLois Curfman McInnes 58b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 59e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 60e3372554SBarry Smith if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large"); 61b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 62b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 63b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 64e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 65b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 66e3372554SBarry Smith SETERRQ(1,0,"Column too large"); 67b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 68b49de8d1SLois Curfman McInnes } 69b49de8d1SLois Curfman McInnes } 70b49de8d1SLois Curfman McInnes else { 71e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 728965ea79SLois Curfman McInnes } 738965ea79SLois Curfman McInnes } 748965ea79SLois Curfman McInnes return 0; 758965ea79SLois Curfman McInnes } 768965ea79SLois Curfman McInnes 775615d1e5SSatish Balay #undef __FUNC__ 785615d1e5SSatish Balay #define __FUNC__ "MatGetArray_MPIDense" 79ff14e315SSatish Balay static int MatGetArray_MPIDense(Mat A,Scalar **array) 80ff14e315SSatish Balay { 81ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82ff14e315SSatish Balay int ierr; 83ff14e315SSatish Balay 84ff14e315SSatish Balay ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85ff14e315SSatish Balay return 0; 86ff14e315SSatish Balay } 87ff14e315SSatish Balay 885615d1e5SSatish Balay #undef __FUNC__ 895615d1e5SSatish Balay #define __FUNC__ "MatRestoreArray_MPIDense" 90ff14e315SSatish Balay static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 91ff14e315SSatish Balay { 92ff14e315SSatish Balay return 0; 93ff14e315SSatish Balay } 94ff14e315SSatish Balay 955615d1e5SSatish Balay #undef __FUNC__ 965615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIDense" 9739ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 988965ea79SLois Curfman McInnes { 9939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1008965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 10139ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 1028965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 10339ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 1048965ea79SLois Curfman McInnes InsertMode addv; 10539ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 1068965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 1078965ea79SLois Curfman McInnes 1088965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 109e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 1107056b6fcSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 111e3372554SBarry Smith SETERRQ(1,0,"Cannot mix adds/inserts on different procs"); 1128965ea79SLois Curfman McInnes } 113e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 1148965ea79SLois Curfman McInnes 1158965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 1160452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 117cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1180452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 11939ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 12039ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1218965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1228965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1238965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1248965ea79SLois Curfman McInnes } 1258965ea79SLois Curfman McInnes } 1268965ea79SLois Curfman McInnes } 1278965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1288965ea79SLois Curfman McInnes 1298965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1300452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1316d84be18SBarry Smith MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 1328965ea79SLois Curfman McInnes nreceives = work[rank]; 133e3372554SBarry Smith if (nreceives > size) SETERRQ(1,0,"Internal PETSc error"); 1346d84be18SBarry Smith MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 1358965ea79SLois Curfman McInnes nmax = work[rank]; 1360452661fSBarry Smith PetscFree(work); 1378965ea79SLois Curfman McInnes 1388965ea79SLois Curfman McInnes /* post receives: 1398965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1408965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1418965ea79SLois Curfman McInnes to simplify the message passing. 1428965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1438965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1448965ea79SLois Curfman McInnes this is a lot of wasted space. 1458965ea79SLois Curfman McInnes 1468965ea79SLois Curfman McInnes This could be done better. 1478965ea79SLois Curfman McInnes */ 1483b2fbd54SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 1493b2fbd54SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1508965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 1517056b6fcSBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 1528965ea79SLois Curfman McInnes } 1538965ea79SLois Curfman McInnes 1548965ea79SLois Curfman McInnes /* do sends: 1558965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1568965ea79SLois Curfman McInnes the ith processor 1578965ea79SLois Curfman McInnes */ 1583b2fbd54SBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1593b2fbd54SBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1600452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1618965ea79SLois Curfman McInnes starts[0] = 0; 1628965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16339ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 16439ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 16539ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 16639ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1678965ea79SLois Curfman McInnes } 1680452661fSBarry Smith PetscFree(owner); 1698965ea79SLois Curfman McInnes starts[0] = 0; 1708965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1718965ea79SLois Curfman McInnes count = 0; 1728965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1738965ea79SLois Curfman McInnes if (procs[i]) { 1747056b6fcSBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 1758965ea79SLois Curfman McInnes } 1768965ea79SLois Curfman McInnes } 1770452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1788965ea79SLois Curfman McInnes 1798965ea79SLois Curfman McInnes /* Free cache space */ 180d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 18139ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1828965ea79SLois Curfman McInnes 18339ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 18439ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 18539ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 18639ddd567SLois Curfman McInnes mdn->rmax = nmax; 1878965ea79SLois Curfman McInnes 1888965ea79SLois Curfman McInnes return 0; 1898965ea79SLois Curfman McInnes } 19039ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1918965ea79SLois Curfman McInnes 1925615d1e5SSatish Balay #undef __FUNC__ 1935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIDense" 19439ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1958965ea79SLois Curfman McInnes { 19639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1978965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 19839ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1998965ea79SLois Curfman McInnes Scalar *values,val; 200e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 2018965ea79SLois Curfman McInnes 2028965ea79SLois Curfman McInnes /* wait on receives */ 2038965ea79SLois Curfman McInnes while (count) { 20439ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 2058965ea79SLois Curfman McInnes /* unpack receives into our local space */ 20639ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 2078965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2088965ea79SLois Curfman McInnes n = n/3; 2098965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 210227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - mdn->rstart; 211227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2128965ea79SLois Curfman McInnes val = values[3*i+2]; 21339ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 21439ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 2158965ea79SLois Curfman McInnes } 216e3372554SBarry Smith else {SETERRQ(1,0,"Invalid column");} 2178965ea79SLois Curfman McInnes } 2188965ea79SLois Curfman McInnes count--; 2198965ea79SLois Curfman McInnes } 2200452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2218965ea79SLois Curfman McInnes 2228965ea79SLois Curfman McInnes /* wait on sends */ 22339ddd567SLois Curfman McInnes if (mdn->nsends) { 2247056b6fcSBarry Smith send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 22539ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2260452661fSBarry Smith PetscFree(send_status); 2278965ea79SLois Curfman McInnes } 2280452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2298965ea79SLois Curfman McInnes 23039ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 23139ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2328965ea79SLois Curfman McInnes 2336d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 23439ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2358965ea79SLois Curfman McInnes } 2368965ea79SLois Curfman McInnes return 0; 2378965ea79SLois Curfman McInnes } 2388965ea79SLois Curfman McInnes 2395615d1e5SSatish Balay #undef __FUNC__ 2405615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIDense" 24139ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2428965ea79SLois Curfman McInnes { 24339ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 24439ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2458965ea79SLois Curfman McInnes } 2468965ea79SLois Curfman McInnes 2475615d1e5SSatish Balay #undef __FUNC__ 2485615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIDense" 2494e220ebcSLois Curfman McInnes static int MatGetBlockSize_MPIDense(Mat A,int *bs) 2504e220ebcSLois Curfman McInnes { 2514e220ebcSLois Curfman McInnes *bs = 1; 2524e220ebcSLois Curfman McInnes return 0; 2534e220ebcSLois Curfman McInnes } 2544e220ebcSLois Curfman McInnes 2558965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2568965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2578965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2583501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2598965ea79SLois Curfman McInnes routine. 2608965ea79SLois Curfman McInnes */ 2615615d1e5SSatish Balay #undef __FUNC__ 2625615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIDense" 26339ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2648965ea79SLois Curfman McInnes { 26539ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2668965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2678965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2688965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2698965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2708965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2718965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2728965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2738965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2748965ea79SLois Curfman McInnes IS istmp; 2758965ea79SLois Curfman McInnes 27677c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 2778965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2788965ea79SLois Curfman McInnes 2798965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2800452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 281cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2820452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2838965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2848965ea79SLois Curfman McInnes idx = rows[i]; 2858965ea79SLois Curfman McInnes found = 0; 2868965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2878965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2888965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2898965ea79SLois Curfman McInnes } 2908965ea79SLois Curfman McInnes } 291e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 2928965ea79SLois Curfman McInnes } 2938965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2948965ea79SLois Curfman McInnes 2958965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2960452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2978965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2988965ea79SLois Curfman McInnes nrecvs = work[rank]; 2998965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 3008965ea79SLois Curfman McInnes nmax = work[rank]; 3010452661fSBarry Smith PetscFree(work); 3028965ea79SLois Curfman McInnes 3038965ea79SLois Curfman McInnes /* post receives: */ 3040452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 3058965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 3060452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 3078965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 3088965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3098965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3108965ea79SLois Curfman McInnes } 3118965ea79SLois Curfman McInnes 3128965ea79SLois Curfman McInnes /* do sends: 3138965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 3148965ea79SLois Curfman McInnes the ith processor 3158965ea79SLois Curfman McInnes */ 3160452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3177056b6fcSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 3180452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3198965ea79SLois Curfman McInnes starts[0] = 0; 3208965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3218965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 3228965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3238965ea79SLois Curfman McInnes } 3248965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3258965ea79SLois Curfman McInnes 3268965ea79SLois Curfman McInnes starts[0] = 0; 3278965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3288965ea79SLois Curfman McInnes count = 0; 3298965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3308965ea79SLois Curfman McInnes if (procs[i]) { 3318965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3328965ea79SLois Curfman McInnes } 3338965ea79SLois Curfman McInnes } 3340452661fSBarry Smith PetscFree(starts); 3358965ea79SLois Curfman McInnes 3368965ea79SLois Curfman McInnes base = owners[rank]; 3378965ea79SLois Curfman McInnes 3388965ea79SLois Curfman McInnes /* wait on receives */ 3390452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3408965ea79SLois Curfman McInnes source = lens + nrecvs; 3418965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3428965ea79SLois Curfman McInnes while (count) { 3438965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3448965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3458965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3468965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3478965ea79SLois Curfman McInnes lens[imdex] = n; 3488965ea79SLois Curfman McInnes slen += n; 3498965ea79SLois Curfman McInnes count--; 3508965ea79SLois Curfman McInnes } 3510452661fSBarry Smith PetscFree(recv_waits); 3528965ea79SLois Curfman McInnes 3538965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3540452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3558965ea79SLois Curfman McInnes count = 0; 3568965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3578965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3588965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3598965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3608965ea79SLois Curfman McInnes } 3618965ea79SLois Curfman McInnes } 3620452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3630452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3648965ea79SLois Curfman McInnes 3658965ea79SLois Curfman McInnes /* actually zap the local rows */ 366537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3678965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3680452661fSBarry Smith PetscFree(lrows); 3698965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3708965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3718965ea79SLois Curfman McInnes 3728965ea79SLois Curfman McInnes /* wait on sends */ 3738965ea79SLois Curfman McInnes if (nsends) { 3747056b6fcSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3758965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3760452661fSBarry Smith PetscFree(send_status); 3778965ea79SLois Curfman McInnes } 3780452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3798965ea79SLois Curfman McInnes 3808965ea79SLois Curfman McInnes return 0; 3818965ea79SLois Curfman McInnes } 3828965ea79SLois Curfman McInnes 3835615d1e5SSatish Balay #undef __FUNC__ 3845615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIDense" 38539ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3868965ea79SLois Curfman McInnes { 38739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3888965ea79SLois Curfman McInnes int ierr; 389c456f294SBarry Smith 39043a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 39143a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 39244cd7ae7SLois Curfman McInnes ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3938965ea79SLois Curfman McInnes return 0; 3948965ea79SLois Curfman McInnes } 3958965ea79SLois Curfman McInnes 3965615d1e5SSatish Balay #undef __FUNC__ 3975615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIDense" 39839ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3998965ea79SLois Curfman McInnes { 40039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4018965ea79SLois Curfman McInnes int ierr; 402c456f294SBarry Smith 40343a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 40443a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 40544cd7ae7SLois Curfman McInnes ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 4068965ea79SLois Curfman McInnes return 0; 4078965ea79SLois Curfman McInnes } 4088965ea79SLois Curfman McInnes 4095615d1e5SSatish Balay #undef __FUNC__ 4105615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIDense" 411096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 412096963f5SLois Curfman McInnes { 413096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 414096963f5SLois Curfman McInnes int ierr; 4153501a2bdSLois Curfman McInnes Scalar zero = 0.0; 416096963f5SLois Curfman McInnes 4173501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 41844cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 419537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 420537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 421096963f5SLois Curfman McInnes return 0; 422096963f5SLois Curfman McInnes } 423096963f5SLois Curfman McInnes 4245615d1e5SSatish Balay #undef __FUNC__ 4255615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIDense" 426096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 427096963f5SLois Curfman McInnes { 428096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429096963f5SLois Curfman McInnes int ierr; 430096963f5SLois Curfman McInnes 4313501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 43244cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 433537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 434537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 435096963f5SLois Curfman McInnes return 0; 436096963f5SLois Curfman McInnes } 437096963f5SLois Curfman McInnes 4385615d1e5SSatish Balay #undef __FUNC__ 4395615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIDense" 44039ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4418965ea79SLois Curfman McInnes { 44239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 44444cd7ae7SLois Curfman McInnes int ierr, len, i, n, m = a->m, radd; 44544cd7ae7SLois Curfman McInnes Scalar *x, zero = 0.0; 446ed3cc1f0SBarry Smith 44744cd7ae7SLois Curfman McInnes VecSet(&zero,v); 448096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 449096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 450e3372554SBarry Smith if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec"); 45144cd7ae7SLois Curfman McInnes len = PetscMin(aloc->m,aloc->n); 4527ddc982cSLois Curfman McInnes radd = a->rstart*m; 45344cd7ae7SLois Curfman McInnes for ( i=0; i<len; i++ ) { 454096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 455096963f5SLois Curfman McInnes } 456096963f5SLois Curfman McInnes return 0; 4578965ea79SLois Curfman McInnes } 4588965ea79SLois Curfman McInnes 4595615d1e5SSatish Balay #undef __FUNC__ 4605615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIDense" 46139ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4628965ea79SLois Curfman McInnes { 4638965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4643501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4658965ea79SLois Curfman McInnes int ierr; 466ed3cc1f0SBarry Smith 4678965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4683501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4698965ea79SLois Curfman McInnes #endif 4700452661fSBarry Smith PetscFree(mdn->rowners); 4713501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4723501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4733501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 474622d7880SLois Curfman McInnes if (mdn->factor) { 475622d7880SLois Curfman McInnes if (mdn->factor->temp) PetscFree(mdn->factor->temp); 476622d7880SLois Curfman McInnes if (mdn->factor->tag) PetscFree(mdn->factor->tag); 477622d7880SLois Curfman McInnes if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 478622d7880SLois Curfman McInnes PetscFree(mdn->factor); 479622d7880SLois Curfman McInnes } 4800452661fSBarry Smith PetscFree(mdn); 48190f02eecSBarry Smith if (mat->mapping) { 48290f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 48390f02eecSBarry Smith } 4848965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4850452661fSBarry Smith PetscHeaderDestroy(mat); 4868965ea79SLois Curfman McInnes return 0; 4878965ea79SLois Curfman McInnes } 48839ddd567SLois Curfman McInnes 4898965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4908965ea79SLois Curfman McInnes 4915615d1e5SSatish Balay #undef __FUNC__ 4925615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_Binary" 49339ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4948965ea79SLois Curfman McInnes { 49539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4968965ea79SLois Curfman McInnes int ierr; 4977056b6fcSBarry Smith 49839ddd567SLois Curfman McInnes if (mdn->size == 1) { 49939ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5008965ea79SLois Curfman McInnes } 501e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 5028965ea79SLois Curfman McInnes return 0; 5038965ea79SLois Curfman McInnes } 5048965ea79SLois Curfman McInnes 5055615d1e5SSatish Balay #undef __FUNC__ 5065615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_ASCII" 50739ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 5088965ea79SLois Curfman McInnes { 50939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 51039ddd567SLois Curfman McInnes int ierr, format; 5118965ea79SLois Curfman McInnes FILE *fd; 51219bcc07fSBarry Smith ViewerType vtype; 5138965ea79SLois Curfman McInnes 51419bcc07fSBarry Smith ViewerGetType(viewer,&vtype); 51590ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 51690ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 517639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5184e220ebcSLois Curfman McInnes int rank; 5194e220ebcSLois Curfman McInnes MatInfo info; 520096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 5214e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 52277c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 5234e220ebcSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 5244e220ebcSLois Curfman McInnes (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 525096963f5SLois Curfman McInnes fflush(fd); 52677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5273501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 5283501a2bdSLois Curfman McInnes return 0; 5293501a2bdSLois Curfman McInnes } 53002cad45dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 531096963f5SLois Curfman McInnes return 0; 5328965ea79SLois Curfman McInnes } 53319bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 53477c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 53539ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 53639ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 53739ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5388965ea79SLois Curfman McInnes fflush(fd); 53977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5408965ea79SLois Curfman McInnes } 5418965ea79SLois Curfman McInnes else { 54239ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5438965ea79SLois Curfman McInnes if (size == 1) { 54439ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5458965ea79SLois Curfman McInnes } 5468965ea79SLois Curfman McInnes else { 5478965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5488965ea79SLois Curfman McInnes Mat A; 54939ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 55039ddd567SLois Curfman McInnes Scalar *vals; 55139ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5528965ea79SLois Curfman McInnes 5538965ea79SLois Curfman McInnes if (!rank) { 5540513a670SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 5558965ea79SLois Curfman McInnes } 5568965ea79SLois Curfman McInnes else { 5570513a670SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 5588965ea79SLois Curfman McInnes } 5598965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5608965ea79SLois Curfman McInnes 56139ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 56239ddd567SLois Curfman McInnes but it's quick for now */ 56339ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5648965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 56539ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 56639ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 56739ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 56839ddd567SLois Curfman McInnes row++; 5698965ea79SLois Curfman McInnes } 5708965ea79SLois Curfman McInnes 5716d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5726d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5738965ea79SLois Curfman McInnes if (!rank) { 57439ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5758965ea79SLois Curfman McInnes } 5768965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5778965ea79SLois Curfman McInnes } 5788965ea79SLois Curfman McInnes } 5798965ea79SLois Curfman McInnes return 0; 5808965ea79SLois Curfman McInnes } 5818965ea79SLois Curfman McInnes 5825615d1e5SSatish Balay #undef __FUNC__ 5835615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense" 58439ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5858965ea79SLois Curfman McInnes { 5868965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 58739ddd567SLois Curfman McInnes int ierr; 588bcd2baecSBarry Smith ViewerType vtype; 5898965ea79SLois Curfman McInnes 590bcd2baecSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 591bcd2baecSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 59239ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5938965ea79SLois Curfman McInnes } 594bcd2baecSBarry Smith else if (vtype == ASCII_FILES_VIEWER) { 59539ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5968965ea79SLois Curfman McInnes } 597bcd2baecSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 59839ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5998965ea79SLois Curfman McInnes } 6008965ea79SLois Curfman McInnes return 0; 6018965ea79SLois Curfman McInnes } 6028965ea79SLois Curfman McInnes 6035615d1e5SSatish Balay #undef __FUNC__ 6045615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIDense" 6054e220ebcSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 6068965ea79SLois Curfman McInnes { 6073501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6083501a2bdSLois Curfman McInnes Mat mdn = mat->A; 6094e220ebcSLois Curfman McInnes int ierr; 6104e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 6118965ea79SLois Curfman McInnes 6124e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 6134e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 6144e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 6154e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 6164e220ebcSLois Curfman McInnes info->block_size = 1.0; 6174e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 6184e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 6194e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 6208965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 6214e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 6224e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 6234e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 6244e220ebcSLois Curfman McInnes info->memory = isend[3]; 6254e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 6268965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 627dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,A->comm); 6284e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6294e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6304e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6314e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6324e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6338965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 634dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,A->comm); 6354e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6364e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6374e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6384e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6394e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6408965ea79SLois Curfman McInnes } 6414e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 6424e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 6434e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 6448965ea79SLois Curfman McInnes return 0; 6458965ea79SLois Curfman McInnes } 6468965ea79SLois Curfman McInnes 6478c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 6488aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 6498aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 6508aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 6518c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 6528aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 6538aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 6548aaee692SLois Curfman McInnes 6555615d1e5SSatish Balay #undef __FUNC__ 6565615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIDense" 65739ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 6588965ea79SLois Curfman McInnes { 65939ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6608965ea79SLois Curfman McInnes 6616d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 6626d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 663*c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR || 664219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 665219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED) { 666b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 667b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 668aeafbbfcSLois Curfman McInnes a->roworiented = 1; 6698965ea79SLois Curfman McInnes MatSetOption(a->A,op); 670b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 671219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 6726d4a8577SBarry Smith op == MAT_SYMMETRIC || 6736d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 6746d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 67594a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6766d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) 67739b7565bSBarry Smith {a->roworiented = 0; MatSetOption(a->A,op);} 6786d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 679e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 6808965ea79SLois Curfman McInnes else 681e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 6828965ea79SLois Curfman McInnes return 0; 6838965ea79SLois Curfman McInnes } 6848965ea79SLois Curfman McInnes 6855615d1e5SSatish Balay #undef __FUNC__ 6865615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIDense" 6873501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6888965ea79SLois Curfman McInnes { 6893501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6908965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6918965ea79SLois Curfman McInnes return 0; 6928965ea79SLois Curfman McInnes } 6938965ea79SLois Curfman McInnes 6945615d1e5SSatish Balay #undef __FUNC__ 6955615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIDense" 6963501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6978965ea79SLois Curfman McInnes { 6983501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6998965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 7008965ea79SLois Curfman McInnes return 0; 7018965ea79SLois Curfman McInnes } 7028965ea79SLois Curfman McInnes 7035615d1e5SSatish Balay #undef __FUNC__ 7045615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIDense" 7053501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 7068965ea79SLois Curfman McInnes { 7073501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7088965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 7098965ea79SLois Curfman McInnes return 0; 7108965ea79SLois Curfman McInnes } 7118965ea79SLois Curfman McInnes 7125615d1e5SSatish Balay #undef __FUNC__ 7135615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIDense" 7143501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 7158965ea79SLois Curfman McInnes { 7163501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 71739ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 7188965ea79SLois Curfman McInnes 719e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows") 7208965ea79SLois Curfman McInnes lrow = row - rstart; 72139ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 7228965ea79SLois Curfman McInnes } 7238965ea79SLois Curfman McInnes 7245615d1e5SSatish Balay #undef __FUNC__ 7255615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIDense" 72639ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 7278965ea79SLois Curfman McInnes { 7280452661fSBarry Smith if (idx) PetscFree(*idx); 7290452661fSBarry Smith if (v) PetscFree(*v); 7308965ea79SLois Curfman McInnes return 0; 7318965ea79SLois Curfman McInnes } 7328965ea79SLois Curfman McInnes 7335615d1e5SSatish Balay #undef __FUNC__ 7345615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIDense" 735cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 736096963f5SLois Curfman McInnes { 7373501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 7383501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 7393501a2bdSLois Curfman McInnes int ierr, i, j; 7403501a2bdSLois Curfman McInnes double sum = 0.0; 7413501a2bdSLois Curfman McInnes Scalar *v = mat->v; 7423501a2bdSLois Curfman McInnes 7433501a2bdSLois Curfman McInnes if (mdn->size == 1) { 7443501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 7453501a2bdSLois Curfman McInnes } else { 7463501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 7473501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 7483501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 7493501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 7503501a2bdSLois Curfman McInnes #else 7513501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 7523501a2bdSLois Curfman McInnes #endif 7533501a2bdSLois Curfman McInnes } 7546d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 7553501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 7563501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 7573501a2bdSLois Curfman McInnes } 7583501a2bdSLois Curfman McInnes else if (type == NORM_1) { 7593501a2bdSLois Curfman McInnes double *tmp, *tmp2; 7600452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 7613501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 762cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 763096963f5SLois Curfman McInnes *norm = 0.0; 7643501a2bdSLois Curfman McInnes v = mat->v; 7653501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 7663501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 76767e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 7683501a2bdSLois Curfman McInnes } 7693501a2bdSLois Curfman McInnes } 7706d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 7713501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 7723501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 7733501a2bdSLois Curfman McInnes } 7740452661fSBarry Smith PetscFree(tmp); 7753501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 7763501a2bdSLois Curfman McInnes } 7773501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 7783501a2bdSLois Curfman McInnes double ntemp; 7793501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 7806d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 7813501a2bdSLois Curfman McInnes } 7823501a2bdSLois Curfman McInnes else { 783e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 7843501a2bdSLois Curfman McInnes } 7853501a2bdSLois Curfman McInnes } 7863501a2bdSLois Curfman McInnes return 0; 7873501a2bdSLois Curfman McInnes } 7883501a2bdSLois Curfman McInnes 7895615d1e5SSatish Balay #undef __FUNC__ 7905615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIDense" 7913501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7923501a2bdSLois Curfman McInnes { 7933501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7943501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7953501a2bdSLois Curfman McInnes Mat B; 7963501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 7973501a2bdSLois Curfman McInnes int j, i, ierr; 7983501a2bdSLois Curfman McInnes Scalar *v; 7993501a2bdSLois Curfman McInnes 8007056b6fcSBarry Smith if (matout == PETSC_NULL && M != N) { 801e3372554SBarry Smith SETERRQ(1,0,"Supports square matrix only in-place"); 8027056b6fcSBarry Smith } 8037056b6fcSBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 8043501a2bdSLois Curfman McInnes 8053501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 8060452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 8073501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 8083501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 8093501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 8103501a2bdSLois Curfman McInnes v += m; 8113501a2bdSLois Curfman McInnes } 8120452661fSBarry Smith PetscFree(rwork); 8136d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8146d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8153638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 8163501a2bdSLois Curfman McInnes *matout = B; 8173501a2bdSLois Curfman McInnes } else { 8183501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 8190452661fSBarry Smith PetscFree(a->rowners); 8203501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 8213501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 8223501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 8230452661fSBarry Smith PetscFree(a); 8243501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 8250452661fSBarry Smith PetscHeaderDestroy(B); 8263501a2bdSLois Curfman McInnes } 827096963f5SLois Curfman McInnes return 0; 828096963f5SLois Curfman McInnes } 829096963f5SLois Curfman McInnes 83044cd7ae7SLois Curfman McInnes #include "pinclude/plapack.h" 8315615d1e5SSatish Balay #undef __FUNC__ 8325615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIDense" 83344cd7ae7SLois Curfman McInnes static int MatScale_MPIDense(Scalar *alpha,Mat inA) 83444cd7ae7SLois Curfman McInnes { 83544cd7ae7SLois Curfman McInnes Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 83644cd7ae7SLois Curfman McInnes Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 83744cd7ae7SLois Curfman McInnes int one = 1, nz; 83844cd7ae7SLois Curfman McInnes 83944cd7ae7SLois Curfman McInnes nz = a->m*a->n; 84044cd7ae7SLois Curfman McInnes BLscal_( &nz, alpha, a->v, &one ); 84144cd7ae7SLois Curfman McInnes PLogFlops(nz); 84244cd7ae7SLois Curfman McInnes return 0; 84344cd7ae7SLois Curfman McInnes } 84444cd7ae7SLois Curfman McInnes 8453d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 8468965ea79SLois Curfman McInnes 8478965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 84839ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 84939ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 85039ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 851096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 852e7ca0642SLois Curfman McInnes /* MatSolve_MPIDense,0, */ 853e7ca0642SLois Curfman McInnes 0,0, 85439ddd567SLois Curfman McInnes 0,0, 8558c469469SLois Curfman McInnes 0,0, 8568c469469SLois Curfman McInnes /* MatLUFactor_MPIDense,0, */ 8578aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 85839ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 859096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 86039ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 8618965ea79SLois Curfman McInnes 0, 86239ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 8633b2fbd54SBarry Smith 0,0, 8648c469469SLois Curfman McInnes /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 8658aaee692SLois Curfman McInnes 0,0, 86639ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 86739ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 868ff14e315SSatish Balay 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 86994a9d846SBarry Smith MatConvertSameType_MPIDense, 870b49de8d1SLois Curfman McInnes 0,0,0,0,0, 8714e220ebcSLois Curfman McInnes 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 8724e220ebcSLois Curfman McInnes 0,0,0,MatGetBlockSize_MPIDense}; 8738965ea79SLois Curfman McInnes 8745615d1e5SSatish Balay #undef __FUNC__ 8755615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIDense" 8768965ea79SLois Curfman McInnes /*@C 87739ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 8788965ea79SLois Curfman McInnes 8798965ea79SLois Curfman McInnes Input Parameters: 8808965ea79SLois Curfman McInnes . comm - MPI communicator 8818965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 8828965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 8838965ea79SLois Curfman McInnes if N is given) 8848965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 8858965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 8868965ea79SLois Curfman McInnes if n is given) 887b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 888dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 8898965ea79SLois Curfman McInnes 8908965ea79SLois Curfman McInnes Output Parameter: 891477f1c0bSLois Curfman McInnes . A - the matrix 8928965ea79SLois Curfman McInnes 8938965ea79SLois Curfman McInnes Notes: 89439ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 89539ddd567SLois Curfman McInnes storage by columns. 8968965ea79SLois Curfman McInnes 89718f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 89818f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 899b4fd4287SBarry Smith set data=PETSC_NULL. 90018f449edSLois Curfman McInnes 9018965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 9028965ea79SLois Curfman McInnes (possibly both). 9038965ea79SLois Curfman McInnes 9043501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 9053501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 9063501a2bdSLois Curfman McInnes 90739ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 9088965ea79SLois Curfman McInnes 90939ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 9108965ea79SLois Curfman McInnes @*/ 911477f1c0bSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 9128965ea79SLois Curfman McInnes { 9138965ea79SLois Curfman McInnes Mat mat; 91439ddd567SLois Curfman McInnes Mat_MPIDense *a; 91525cdf11fSBarry Smith int ierr, i,flg; 9168965ea79SLois Curfman McInnes 917ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 918ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 91918f449edSLois Curfman McInnes 920477f1c0bSLois Curfman McInnes *A = 0; 9210452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 9228965ea79SLois Curfman McInnes PLogObjectCreate(mat); 9230452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 9248965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 92539ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 92639ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 9278965ea79SLois Curfman McInnes mat->factor = 0; 92890f02eecSBarry Smith mat->mapping = 0; 9298965ea79SLois Curfman McInnes 930622d7880SLois Curfman McInnes a->factor = 0; 931e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 9328965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 9338965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 9348965ea79SLois Curfman McInnes 93539ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 9368965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 93739ddd567SLois Curfman McInnes 93839ddd567SLois Curfman McInnes /* each row stores all columns */ 93939ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 940d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 941e3372554SBarry Smith /* if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */ 942aca0ad90SLois Curfman McInnes a->N = mat->N = N; 943aca0ad90SLois Curfman McInnes a->M = mat->M = M; 944aca0ad90SLois Curfman McInnes a->m = mat->m = m; 945aca0ad90SLois Curfman McInnes a->n = mat->n = n; 9468965ea79SLois Curfman McInnes 9478965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 948d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 949d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 9507056b6fcSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9518965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 9528965ea79SLois Curfman McInnes a->rowners[0] = 0; 9538965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 9548965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 9558965ea79SLois Curfman McInnes } 9568965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 9578965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 958d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 959d7e8b826SBarry Smith a->cowners[0] = 0; 960d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 961d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 962d7e8b826SBarry Smith } 9638965ea79SLois Curfman McInnes 96418f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 9658965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9668965ea79SLois Curfman McInnes 9678965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 9688965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 9698965ea79SLois Curfman McInnes 9708965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 9718965ea79SLois Curfman McInnes a->lvec = 0; 9728965ea79SLois Curfman McInnes a->Mvctx = 0; 97339b7565bSBarry Smith a->roworiented = 1; 9748965ea79SLois Curfman McInnes 975477f1c0bSLois Curfman McInnes *A = mat; 97625cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 97725cdf11fSBarry Smith if (flg) { 9788c469469SLois Curfman McInnes ierr = MatPrintHelp(mat); CHKERRQ(ierr); 9798c469469SLois Curfman McInnes } 9808965ea79SLois Curfman McInnes return 0; 9818965ea79SLois Curfman McInnes } 9828965ea79SLois Curfman McInnes 9835615d1e5SSatish Balay #undef __FUNC__ 9845615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIDense" 9853d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 9868965ea79SLois Curfman McInnes { 9878965ea79SLois Curfman McInnes Mat mat; 9883501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 98939ddd567SLois Curfman McInnes int ierr; 9902ba99913SLois Curfman McInnes FactorCtx *factor; 9918965ea79SLois Curfman McInnes 9928965ea79SLois Curfman McInnes *newmat = 0; 9930452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 9948965ea79SLois Curfman McInnes PLogObjectCreate(mat); 9950452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 9968965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 99739ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 99839ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 9993501a2bdSLois Curfman McInnes mat->factor = A->factor; 1000c456f294SBarry Smith mat->assembled = PETSC_TRUE; 10018965ea79SLois Curfman McInnes 100244cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 100344cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 100444cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 100544cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 10062ba99913SLois Curfman McInnes if (oldmat->factor) { 10072ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 10082ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 10092ba99913SLois Curfman McInnes } else a->factor = 0; 10108965ea79SLois Curfman McInnes 10118965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 10128965ea79SLois Curfman McInnes a->rend = oldmat->rend; 10138965ea79SLois Curfman McInnes a->size = oldmat->size; 10148965ea79SLois Curfman McInnes a->rank = oldmat->rank; 1015e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 10168965ea79SLois Curfman McInnes 10170452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 101839ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 10198965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 10208965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 10218965ea79SLois Curfman McInnes 10228965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 10238965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 102455659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 10258965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 10268965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 10278965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 10288965ea79SLois Curfman McInnes *newmat = mat; 10298965ea79SLois Curfman McInnes return 0; 10308965ea79SLois Curfman McInnes } 10318965ea79SLois Curfman McInnes 103277c4ece6SBarry Smith #include "sys.h" 10338965ea79SLois Curfman McInnes 10345615d1e5SSatish Balay #undef __FUNC__ 10355615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 103690ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 103790ace30eSBarry Smith { 103890ace30eSBarry Smith int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 103990ace30eSBarry Smith Scalar *array,*vals,*vals_ptr; 104090ace30eSBarry Smith MPI_Status status; 104190ace30eSBarry Smith 104290ace30eSBarry Smith MPI_Comm_rank(comm,&rank); 104390ace30eSBarry Smith MPI_Comm_size(comm,&size); 104490ace30eSBarry Smith 104590ace30eSBarry Smith /* determine ownership of all rows */ 104690ace30eSBarry Smith m = M/size + ((M % size) > rank); 104790ace30eSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 104890ace30eSBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 104990ace30eSBarry Smith rowners[0] = 0; 105090ace30eSBarry Smith for ( i=2; i<=size; i++ ) { 105190ace30eSBarry Smith rowners[i] += rowners[i-1]; 105290ace30eSBarry Smith } 105390ace30eSBarry Smith rstart = rowners[rank]; 105490ace30eSBarry Smith rend = rowners[rank+1]; 105590ace30eSBarry Smith 105690ace30eSBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 105790ace30eSBarry Smith ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 105890ace30eSBarry Smith 105990ace30eSBarry Smith if (!rank) { 106090ace30eSBarry Smith vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 106190ace30eSBarry Smith 106290ace30eSBarry Smith /* read in my part of the matrix numerical values */ 106377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 106490ace30eSBarry Smith 106590ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 106690ace30eSBarry Smith vals_ptr = vals; 106790ace30eSBarry Smith for ( i=0; i<m; i++ ) { 106890ace30eSBarry Smith for ( j=0; j<N; j++ ) { 106990ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 107090ace30eSBarry Smith } 107190ace30eSBarry Smith } 107290ace30eSBarry Smith 107390ace30eSBarry Smith /* read in other processors and ship out */ 107490ace30eSBarry Smith for ( i=1; i<size; i++ ) { 107590ace30eSBarry Smith nz = (rowners[i+1] - rowners[i])*N; 107677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 107790ace30eSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 107890ace30eSBarry Smith } 107990ace30eSBarry Smith } 108090ace30eSBarry Smith else { 108190ace30eSBarry Smith /* receive numeric values */ 108290ace30eSBarry Smith vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 108390ace30eSBarry Smith 108490ace30eSBarry Smith /* receive message of values*/ 108590ace30eSBarry Smith MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 108690ace30eSBarry Smith 108790ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 108890ace30eSBarry Smith vals_ptr = vals; 108990ace30eSBarry Smith for ( i=0; i<m; i++ ) { 109090ace30eSBarry Smith for ( j=0; j<N; j++ ) { 109190ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 109290ace30eSBarry Smith } 109390ace30eSBarry Smith } 109490ace30eSBarry Smith } 109590ace30eSBarry Smith PetscFree(rowners); 109690ace30eSBarry Smith PetscFree(vals); 10976d4a8577SBarry Smith ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10986d4a8577SBarry Smith ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 109990ace30eSBarry Smith return 0; 110090ace30eSBarry Smith } 110190ace30eSBarry Smith 110290ace30eSBarry Smith 11035615d1e5SSatish Balay #undef __FUNC__ 11045615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense" 110519bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 11068965ea79SLois Curfman McInnes { 11078965ea79SLois Curfman McInnes Mat A; 11088965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 11098965ea79SLois Curfman McInnes Scalar *vals,*svals; 111019bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 11118965ea79SLois Curfman McInnes MPI_Status status; 11128965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 11138965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 111419bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 11158965ea79SLois Curfman McInnes 11168965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 11178965ea79SLois Curfman McInnes if (!rank) { 111890ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 111977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1120e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 11218965ea79SLois Curfman McInnes } 11228965ea79SLois Curfman McInnes 11238965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 112490ace30eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 112590ace30eSBarry Smith 112690ace30eSBarry Smith /* 112790ace30eSBarry Smith Handle case where matrix is stored on disk as a dense matrix 112890ace30eSBarry Smith */ 112990ace30eSBarry Smith if (nz == MATRIX_BINARY_FORMAT_DENSE) { 113090ace30eSBarry Smith return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 113190ace30eSBarry Smith } 113290ace30eSBarry Smith 11338965ea79SLois Curfman McInnes /* determine ownership of all rows */ 11348965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 11350452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 11368965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 11378965ea79SLois Curfman McInnes rowners[0] = 0; 11388965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 11398965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 11408965ea79SLois Curfman McInnes } 11418965ea79SLois Curfman McInnes rstart = rowners[rank]; 11428965ea79SLois Curfman McInnes rend = rowners[rank+1]; 11438965ea79SLois Curfman McInnes 11448965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 11450452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 11468965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 11478965ea79SLois Curfman McInnes if (!rank) { 11480452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 114977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 11500452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 11518965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 11528965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 11530452661fSBarry Smith PetscFree(sndcounts); 11548965ea79SLois Curfman McInnes } 11558965ea79SLois Curfman McInnes else { 11568965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 11578965ea79SLois Curfman McInnes } 11588965ea79SLois Curfman McInnes 11598965ea79SLois Curfman McInnes if (!rank) { 11608965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 11610452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1162cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 11638965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 11648965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 11658965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 11668965ea79SLois Curfman McInnes } 11678965ea79SLois Curfman McInnes } 11680452661fSBarry Smith PetscFree(rowlengths); 11698965ea79SLois Curfman McInnes 11708965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 11718965ea79SLois Curfman McInnes maxnz = 0; 11728965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 11730452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 11748965ea79SLois Curfman McInnes } 11750452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 11768965ea79SLois Curfman McInnes 11778965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 11788965ea79SLois Curfman McInnes nz = procsnz[0]; 11790452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 118077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 11818965ea79SLois Curfman McInnes 11828965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 11838965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 11848965ea79SLois Curfman McInnes nz = procsnz[i]; 118577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 11868965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 11878965ea79SLois Curfman McInnes } 11880452661fSBarry Smith PetscFree(cols); 11898965ea79SLois Curfman McInnes } 11908965ea79SLois Curfman McInnes else { 11918965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 11928965ea79SLois Curfman McInnes nz = 0; 11938965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11948965ea79SLois Curfman McInnes nz += ourlens[i]; 11958965ea79SLois Curfman McInnes } 11960452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 11978965ea79SLois Curfman McInnes 11988965ea79SLois Curfman McInnes /* receive message of column indices*/ 11998965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 12008965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 1201e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 12028965ea79SLois Curfman McInnes } 12038965ea79SLois Curfman McInnes 12048965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1205cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 12068965ea79SLois Curfman McInnes jj = 0; 12078965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12088965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 12098965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 12108965ea79SLois Curfman McInnes jj++; 12118965ea79SLois Curfman McInnes } 12128965ea79SLois Curfman McInnes } 12138965ea79SLois Curfman McInnes 12148965ea79SLois Curfman McInnes /* create our matrix */ 12158965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12168965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 12178965ea79SLois Curfman McInnes } 1218b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 12198965ea79SLois Curfman McInnes A = *newmat; 12208965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12218965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 12228965ea79SLois Curfman McInnes } 12238965ea79SLois Curfman McInnes 12248965ea79SLois Curfman McInnes if (!rank) { 12250452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 12268965ea79SLois Curfman McInnes 12278965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 12288965ea79SLois Curfman McInnes nz = procsnz[0]; 122977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 12308965ea79SLois Curfman McInnes 12318965ea79SLois Curfman McInnes /* insert into matrix */ 12328965ea79SLois Curfman McInnes jj = rstart; 12338965ea79SLois Curfman McInnes smycols = mycols; 12348965ea79SLois Curfman McInnes svals = vals; 12358965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12368965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 12378965ea79SLois Curfman McInnes smycols += ourlens[i]; 12388965ea79SLois Curfman McInnes svals += ourlens[i]; 12398965ea79SLois Curfman McInnes jj++; 12408965ea79SLois Curfman McInnes } 12418965ea79SLois Curfman McInnes 12428965ea79SLois Curfman McInnes /* read in other processors and ship out */ 12438965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 12448965ea79SLois Curfman McInnes nz = procsnz[i]; 124577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 12468965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 12478965ea79SLois Curfman McInnes } 12480452661fSBarry Smith PetscFree(procsnz); 12498965ea79SLois Curfman McInnes } 12508965ea79SLois Curfman McInnes else { 12518965ea79SLois Curfman McInnes /* receive numeric values */ 12520452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 12538965ea79SLois Curfman McInnes 12548965ea79SLois Curfman McInnes /* receive message of values*/ 12558965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 12568965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1257e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 12588965ea79SLois Curfman McInnes 12598965ea79SLois Curfman McInnes /* insert into matrix */ 12608965ea79SLois Curfman McInnes jj = rstart; 12618965ea79SLois Curfman McInnes smycols = mycols; 12628965ea79SLois Curfman McInnes svals = vals; 12638965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12648965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 12658965ea79SLois Curfman McInnes smycols += ourlens[i]; 12668965ea79SLois Curfman McInnes svals += ourlens[i]; 12678965ea79SLois Curfman McInnes jj++; 12688965ea79SLois Curfman McInnes } 12698965ea79SLois Curfman McInnes } 12700452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 12718965ea79SLois Curfman McInnes 12726d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12736d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12748965ea79SLois Curfman McInnes return 0; 12758965ea79SLois Curfman McInnes } 127690ace30eSBarry Smith 127790ace30eSBarry Smith 127890ace30eSBarry Smith 127990ace30eSBarry Smith 128090ace30eSBarry Smith 1281