18965ea79SLois Curfman McInnes #ifndef lint 2*8aaee692SLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.19 1995/12/29 22:22:01 bsmith Exp curfman $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes #include "inline/spops.h" 128965ea79SLois Curfman McInnes 1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 148965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 158965ea79SLois Curfman McInnes { 1639b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1739b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1839b7565bSBarry Smith int roworiented = A->roworiented; 198965ea79SLois Curfman McInnes 2039b7565bSBarry Smith if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 2139ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 228965ea79SLois Curfman McInnes } 2339b7565bSBarry Smith A->insertmode = addv; 248965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2539ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2639b7565bSBarry Smith if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 278965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 288965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2939b7565bSBarry Smith if (roworiented) { 3039b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 3139b7565bSBarry Smith } 3239b7565bSBarry Smith else { 338965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 3439ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 3539b7565bSBarry Smith if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 3639b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3739b7565bSBarry Smith } 388965ea79SLois Curfman McInnes } 398965ea79SLois Curfman McInnes } 408965ea79SLois Curfman McInnes else { 4139b7565bSBarry Smith if (roworiented) { 4239b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 4339b7565bSBarry Smith } 4439b7565bSBarry Smith else { /* must stash each seperately */ 4539b7565bSBarry Smith row = idxm[i]; 4639b7565bSBarry Smith for ( j=0; j<n; j++ ) { 4739b7565bSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 4839b7565bSBarry Smith CHKERRQ(ierr); 4939b7565bSBarry Smith } 5039b7565bSBarry Smith } 51b49de8d1SLois Curfman McInnes } 52b49de8d1SLois Curfman McInnes } 53b49de8d1SLois Curfman McInnes return 0; 54b49de8d1SLois Curfman McInnes } 55b49de8d1SLois Curfman McInnes 56b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 57b49de8d1SLois Curfman McInnes { 58b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 59b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 60b49de8d1SLois Curfman McInnes 61b49de8d1SLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatGetValues_MPIDense:Not for unassembled matrix"); 62b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 63b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 64b49de8d1SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 65b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 66b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 67b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 68b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 69b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 70b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 71b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 72b49de8d1SLois Curfman McInnes } 73b49de8d1SLois Curfman McInnes } 74b49de8d1SLois Curfman McInnes else { 75b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 768965ea79SLois Curfman McInnes } 778965ea79SLois Curfman McInnes } 788965ea79SLois Curfman McInnes return 0; 798965ea79SLois Curfman McInnes } 808965ea79SLois Curfman McInnes 8139ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 828965ea79SLois Curfman McInnes { 8339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 848965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 8539ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 868965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 8739ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 888965ea79SLois Curfman McInnes InsertMode addv; 8939ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 908965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 918965ea79SLois Curfman McInnes 928965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 9339ddd567SLois Curfman McInnes MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 9439ddd567SLois Curfman McInnes MPI_BOR,comm); 9539ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 9639ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 978965ea79SLois Curfman McInnes } 9839ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 998965ea79SLois Curfman McInnes 1008965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 1010452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 102cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1030452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 10439ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 10539ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1068965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1078965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1088965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1098965ea79SLois Curfman McInnes } 1108965ea79SLois Curfman McInnes } 1118965ea79SLois Curfman McInnes } 1128965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1138965ea79SLois Curfman McInnes 1148965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1150452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 11639ddd567SLois Curfman McInnes MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 1178965ea79SLois Curfman McInnes nreceives = work[rank]; 11839ddd567SLois Curfman McInnes MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 1198965ea79SLois Curfman McInnes nmax = work[rank]; 1200452661fSBarry Smith PetscFree(work); 1218965ea79SLois Curfman McInnes 1228965ea79SLois Curfman McInnes /* post receives: 1238965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1248965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1258965ea79SLois Curfman McInnes to simplify the message passing. 1268965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1278965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1288965ea79SLois Curfman McInnes this is a lot of wasted space. 1298965ea79SLois Curfman McInnes 1308965ea79SLois Curfman McInnes This could be done better. 1318965ea79SLois Curfman McInnes */ 1320452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 1338965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 1340452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 1358965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 1368965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 13739ddd567SLois Curfman McInnes MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1388965ea79SLois Curfman McInnes comm,recv_waits+i); 1398965ea79SLois Curfman McInnes } 1408965ea79SLois Curfman McInnes 1418965ea79SLois Curfman McInnes /* do sends: 1428965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1438965ea79SLois Curfman McInnes the ith processor 1448965ea79SLois Curfman McInnes */ 1450452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 14639ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1470452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1488965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1490452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1508965ea79SLois Curfman McInnes starts[0] = 0; 1518965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15239ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 15339ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 15439ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 15539ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1568965ea79SLois Curfman McInnes } 1570452661fSBarry Smith PetscFree(owner); 1588965ea79SLois Curfman McInnes starts[0] = 0; 1598965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1608965ea79SLois Curfman McInnes count = 0; 1618965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1628965ea79SLois Curfman McInnes if (procs[i]) { 16339ddd567SLois Curfman McInnes MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1648965ea79SLois Curfman McInnes comm,send_waits+count++); 1658965ea79SLois Curfman McInnes } 1668965ea79SLois Curfman McInnes } 1670452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1688965ea79SLois Curfman McInnes 1698965ea79SLois Curfman McInnes /* Free cache space */ 17039ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1718965ea79SLois Curfman McInnes 17239ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 17339ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 17439ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 17539ddd567SLois Curfman McInnes mdn->rmax = nmax; 1768965ea79SLois Curfman McInnes 1778965ea79SLois Curfman McInnes return 0; 1788965ea79SLois Curfman McInnes } 17939ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1808965ea79SLois Curfman McInnes 18139ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1828965ea79SLois Curfman McInnes { 18339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1848965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 18539ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1868965ea79SLois Curfman McInnes Scalar *values,val; 18739ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1888965ea79SLois Curfman McInnes 1898965ea79SLois Curfman McInnes /* wait on receives */ 1908965ea79SLois Curfman McInnes while (count) { 19139ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 1928965ea79SLois Curfman McInnes /* unpack receives into our local space */ 19339ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 1948965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 1958965ea79SLois Curfman McInnes n = n/3; 1968965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 19739ddd567SLois Curfman McInnes row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 1988965ea79SLois Curfman McInnes col = (int) PETSCREAL(values[3*i+1]); 1998965ea79SLois Curfman McInnes val = values[3*i+2]; 20039ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 20139ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 2028965ea79SLois Curfman McInnes } 20339ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 2048965ea79SLois Curfman McInnes } 2058965ea79SLois Curfman McInnes count--; 2068965ea79SLois Curfman McInnes } 2070452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2088965ea79SLois Curfman McInnes 2098965ea79SLois Curfman McInnes /* wait on sends */ 21039ddd567SLois Curfman McInnes if (mdn->nsends) { 2110452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 2128965ea79SLois Curfman McInnes CHKPTRQ(send_status); 21339ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2140452661fSBarry Smith PetscFree(send_status); 2158965ea79SLois Curfman McInnes } 2160452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2178965ea79SLois Curfman McInnes 21839ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 21939ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 22039ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2218965ea79SLois Curfman McInnes 22239ddd567SLois Curfman McInnes if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 22339ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2248965ea79SLois Curfman McInnes } 22539ddd567SLois Curfman McInnes mdn->assembled = 1; 2268965ea79SLois Curfman McInnes return 0; 2278965ea79SLois Curfman McInnes } 2288965ea79SLois Curfman McInnes 22939ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2308965ea79SLois Curfman McInnes { 23139ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 23239ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2338965ea79SLois Curfman McInnes } 2348965ea79SLois Curfman McInnes 2358965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2368965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2378965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2383501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2398965ea79SLois Curfman McInnes routine. 2408965ea79SLois Curfman McInnes */ 24139ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2428965ea79SLois Curfman McInnes { 24339ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2448965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2458965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2468965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2478965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2488965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2498965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2508965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2518965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2528965ea79SLois Curfman McInnes IS istmp; 2538965ea79SLois Curfman McInnes 25439ddd567SLois Curfman McInnes if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 2558965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 2568965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2578965ea79SLois Curfman McInnes 2588965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2590452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 260cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2610452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2628965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2638965ea79SLois Curfman McInnes idx = rows[i]; 2648965ea79SLois Curfman McInnes found = 0; 2658965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2668965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2678965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2688965ea79SLois Curfman McInnes } 2698965ea79SLois Curfman McInnes } 27039ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2718965ea79SLois Curfman McInnes } 2728965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2738965ea79SLois Curfman McInnes 2748965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2750452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2768965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2778965ea79SLois Curfman McInnes nrecvs = work[rank]; 2788965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2798965ea79SLois Curfman McInnes nmax = work[rank]; 2800452661fSBarry Smith PetscFree(work); 2818965ea79SLois Curfman McInnes 2828965ea79SLois Curfman McInnes /* post receives: */ 2830452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2848965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2850452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2868965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2878965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2888965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2898965ea79SLois Curfman McInnes } 2908965ea79SLois Curfman McInnes 2918965ea79SLois Curfman McInnes /* do sends: 2928965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2938965ea79SLois Curfman McInnes the ith processor 2948965ea79SLois Curfman McInnes */ 2950452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 2960452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 2978965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 2980452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 2998965ea79SLois Curfman McInnes starts[0] = 0; 3008965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3018965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 3028965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3038965ea79SLois Curfman McInnes } 3048965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3058965ea79SLois Curfman McInnes 3068965ea79SLois Curfman McInnes starts[0] = 0; 3078965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3088965ea79SLois Curfman McInnes count = 0; 3098965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3108965ea79SLois Curfman McInnes if (procs[i]) { 3118965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3128965ea79SLois Curfman McInnes } 3138965ea79SLois Curfman McInnes } 3140452661fSBarry Smith PetscFree(starts); 3158965ea79SLois Curfman McInnes 3168965ea79SLois Curfman McInnes base = owners[rank]; 3178965ea79SLois Curfman McInnes 3188965ea79SLois Curfman McInnes /* wait on receives */ 3190452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3208965ea79SLois Curfman McInnes source = lens + nrecvs; 3218965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3228965ea79SLois Curfman McInnes while (count) { 3238965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3248965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3258965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3268965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3278965ea79SLois Curfman McInnes lens[imdex] = n; 3288965ea79SLois Curfman McInnes slen += n; 3298965ea79SLois Curfman McInnes count--; 3308965ea79SLois Curfman McInnes } 3310452661fSBarry Smith PetscFree(recv_waits); 3328965ea79SLois Curfman McInnes 3338965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3340452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3358965ea79SLois Curfman McInnes count = 0; 3368965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3378965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3388965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3398965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3408965ea79SLois Curfman McInnes } 3418965ea79SLois Curfman McInnes } 3420452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3430452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3448965ea79SLois Curfman McInnes 3458965ea79SLois Curfman McInnes /* actually zap the local rows */ 3468965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3478965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3480452661fSBarry Smith PetscFree(lrows); 3498965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3508965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3518965ea79SLois Curfman McInnes 3528965ea79SLois Curfman McInnes /* wait on sends */ 3538965ea79SLois Curfman McInnes if (nsends) { 3540452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3558965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3568965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3570452661fSBarry Smith PetscFree(send_status); 3588965ea79SLois Curfman McInnes } 3590452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3608965ea79SLois Curfman McInnes 3618965ea79SLois Curfman McInnes return 0; 3628965ea79SLois Curfman McInnes } 3638965ea79SLois Curfman McInnes 36439ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3658965ea79SLois Curfman McInnes { 36639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3678965ea79SLois Curfman McInnes int ierr; 36839ddd567SLois Curfman McInnes if (!mdn->assembled) 36939ddd567SLois Curfman McInnes SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 37039ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 37139ddd567SLois Curfman McInnes CHKERRQ(ierr); 37239ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 37339ddd567SLois Curfman McInnes CHKERRQ(ierr); 37439ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3758965ea79SLois Curfman McInnes return 0; 3768965ea79SLois Curfman McInnes } 3778965ea79SLois Curfman McInnes 37839ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3798965ea79SLois Curfman McInnes { 38039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3818965ea79SLois Curfman McInnes int ierr; 38239ddd567SLois Curfman McInnes if (!mdn->assembled) 38339ddd567SLois Curfman McInnes SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 3843501a2bdSLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 38539ddd567SLois Curfman McInnes CHKERRQ(ierr); 3863501a2bdSLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 38739ddd567SLois Curfman McInnes CHKERRQ(ierr); 38839ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3898965ea79SLois Curfman McInnes return 0; 3908965ea79SLois Curfman McInnes } 3918965ea79SLois Curfman McInnes 392096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 393096963f5SLois Curfman McInnes { 394096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 395096963f5SLois Curfman McInnes int ierr; 3963501a2bdSLois Curfman McInnes Scalar zero = 0.0; 397096963f5SLois Curfman McInnes 398096963f5SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix"); 3993501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 400096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 401096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 402096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 403096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 404096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 405096963f5SLois Curfman McInnes return 0; 406096963f5SLois Curfman McInnes } 407096963f5SLois Curfman McInnes 408096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 409096963f5SLois Curfman McInnes { 410096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 411096963f5SLois Curfman McInnes int ierr; 412096963f5SLois Curfman McInnes 413096963f5SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix"); 4143501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 4153501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 416096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 417096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 418096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 419096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 420096963f5SLois Curfman McInnes return 0; 421096963f5SLois Curfman McInnes } 422096963f5SLois Curfman McInnes 42339ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4248965ea79SLois Curfman McInnes { 42539ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 426096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 427096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 428096963f5SLois Curfman McInnes Scalar *x; 429ed3cc1f0SBarry Smith 43039ddd567SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 431096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 432096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 433096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 434096963f5SLois Curfman McInnes radd = a->rstart*m*m; 435096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 436096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 437096963f5SLois Curfman McInnes } 438096963f5SLois Curfman McInnes return 0; 4398965ea79SLois Curfman McInnes } 4408965ea79SLois Curfman McInnes 44139ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4428965ea79SLois Curfman McInnes { 4438965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4443501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4458965ea79SLois Curfman McInnes int ierr; 446ed3cc1f0SBarry Smith 4478965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4483501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4498965ea79SLois Curfman McInnes #endif 4500452661fSBarry Smith PetscFree(mdn->rowners); 4513501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4523501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4533501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 454052cd425SLois Curfman McInnes if (mdn->factor) PetscFree(mdn->factor); 4550452661fSBarry Smith PetscFree(mdn); 4568965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4570452661fSBarry Smith PetscHeaderDestroy(mat); 4588965ea79SLois Curfman McInnes return 0; 4598965ea79SLois Curfman McInnes } 46039ddd567SLois Curfman McInnes 4618965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4628965ea79SLois Curfman McInnes 46339ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4648965ea79SLois Curfman McInnes { 46539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4668965ea79SLois Curfman McInnes int ierr; 46739ddd567SLois Curfman McInnes if (mdn->size == 1) { 46839ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4698965ea79SLois Curfman McInnes } 47039ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4718965ea79SLois Curfman McInnes return 0; 4728965ea79SLois Curfman McInnes } 4738965ea79SLois Curfman McInnes 47439ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4758965ea79SLois Curfman McInnes { 47639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 47739ddd567SLois Curfman McInnes int ierr, format; 4788965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 4798965ea79SLois Curfman McInnes FILE *fd; 4808965ea79SLois Curfman McInnes 4813501a2bdSLois Curfman McInnes ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 4828965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 4838965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 4843501a2bdSLois Curfman McInnes if (format == FILE_FORMAT_INFO_DETAILED) { 485096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 486096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 487096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 488096963f5SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 4893501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 490096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 491096963f5SLois Curfman McInnes fflush(fd); 492096963f5SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4933501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 4943501a2bdSLois Curfman McInnes return 0; 4953501a2bdSLois Curfman McInnes } 4963501a2bdSLois Curfman McInnes else if (format == FILE_FORMAT_INFO) { 497096963f5SLois Curfman McInnes return 0; 4988965ea79SLois Curfman McInnes } 4998965ea79SLois Curfman McInnes } 5008965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 5018965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 50239ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 50339ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 50439ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5058965ea79SLois Curfman McInnes fflush(fd); 5068965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 5078965ea79SLois Curfman McInnes } 5088965ea79SLois Curfman McInnes else { 50939ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5108965ea79SLois Curfman McInnes if (size == 1) { 51139ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5128965ea79SLois Curfman McInnes } 5138965ea79SLois Curfman McInnes else { 5148965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5158965ea79SLois Curfman McInnes Mat A; 51639ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 51739ddd567SLois Curfman McInnes Scalar *vals; 51839ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5198965ea79SLois Curfman McInnes 5208965ea79SLois Curfman McInnes if (!rank) { 521b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5228965ea79SLois Curfman McInnes } 5238965ea79SLois Curfman McInnes else { 524b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5258965ea79SLois Curfman McInnes } 5268965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5278965ea79SLois Curfman McInnes 52839ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 52939ddd567SLois Curfman McInnes but it's quick for now */ 53039ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5318965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 53239ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 53339ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 53439ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 53539ddd567SLois Curfman McInnes row++; 5368965ea79SLois Curfman McInnes } 5378965ea79SLois Curfman McInnes 5388965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5398965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5408965ea79SLois Curfman McInnes if (!rank) { 54139ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5428965ea79SLois Curfman McInnes } 5438965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5448965ea79SLois Curfman McInnes } 5458965ea79SLois Curfman McInnes } 5468965ea79SLois Curfman McInnes return 0; 5478965ea79SLois Curfman McInnes } 5488965ea79SLois Curfman McInnes 54939ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5508965ea79SLois Curfman McInnes { 5518965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 55239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 5538965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 55439ddd567SLois Curfman McInnes int ierr; 5558965ea79SLois Curfman McInnes 55639ddd567SLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 5578965ea79SLois Curfman McInnes if (!viewer) { 5588965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 5598965ea79SLois Curfman McInnes } 56039ddd567SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 56139ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5628965ea79SLois Curfman McInnes } 5638965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 56439ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5658965ea79SLois Curfman McInnes } 5668965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 56739ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5688965ea79SLois Curfman McInnes } 5698965ea79SLois Curfman McInnes return 0; 5708965ea79SLois Curfman McInnes } 5718965ea79SLois Curfman McInnes 5723501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5738965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5748965ea79SLois Curfman McInnes { 5753501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5763501a2bdSLois Curfman McInnes Mat mdn = mat->A; 57739ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5788965ea79SLois Curfman McInnes 5793501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5808965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 5818965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 5828965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5833501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 5848965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5858965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5863501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 5878965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5888965ea79SLois Curfman McInnes } 5898965ea79SLois Curfman McInnes return 0; 5908965ea79SLois Curfman McInnes } 5918965ea79SLois Curfman McInnes 592*8aaee692SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 593*8aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 594*8aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 595*8aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 596*8aaee692SLois Curfman McInnes /* extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 597*8aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 598*8aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 599*8aaee692SLois Curfman McInnes 60039ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 6018965ea79SLois Curfman McInnes { 60239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6038965ea79SLois Curfman McInnes 6048965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 6058965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 6068965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 6078965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 6088965ea79SLois Curfman McInnes MatSetOption(a->A,op); 6098965ea79SLois Curfman McInnes } 6108965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 6118965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 6128965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 6138965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 61439ddd567SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6158965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 61639b7565bSBarry Smith { a->roworiented = 0; MatSetOption(a->A,op);} 6178965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 61839ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 6198965ea79SLois Curfman McInnes else 62039ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 6218965ea79SLois Curfman McInnes return 0; 6228965ea79SLois Curfman McInnes } 6238965ea79SLois Curfman McInnes 6243501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6258965ea79SLois Curfman McInnes { 6263501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6278965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6288965ea79SLois Curfman McInnes return 0; 6298965ea79SLois Curfman McInnes } 6308965ea79SLois Curfman McInnes 6313501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6328965ea79SLois Curfman McInnes { 6333501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6348965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6358965ea79SLois Curfman McInnes return 0; 6368965ea79SLois Curfman McInnes } 6378965ea79SLois Curfman McInnes 6383501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6398965ea79SLois Curfman McInnes { 6403501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6418965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 6428965ea79SLois Curfman McInnes return 0; 6438965ea79SLois Curfman McInnes } 6448965ea79SLois Curfman McInnes 6453501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6468965ea79SLois Curfman McInnes { 6473501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 64839ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6498965ea79SLois Curfman McInnes 65039ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6518965ea79SLois Curfman McInnes lrow = row - rstart; 65239ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6538965ea79SLois Curfman McInnes } 6548965ea79SLois Curfman McInnes 65539ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6568965ea79SLois Curfman McInnes { 6570452661fSBarry Smith if (idx) PetscFree(*idx); 6580452661fSBarry Smith if (v) PetscFree(*v); 6598965ea79SLois Curfman McInnes return 0; 6608965ea79SLois Curfman McInnes } 6618965ea79SLois Curfman McInnes 662cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 663096963f5SLois Curfman McInnes { 6643501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6653501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6663501a2bdSLois Curfman McInnes int ierr, i, j; 6673501a2bdSLois Curfman McInnes double sum = 0.0; 6683501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6693501a2bdSLois Curfman McInnes 6703501a2bdSLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix"); 6713501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6723501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6733501a2bdSLois Curfman McInnes } else { 6743501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6753501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6763501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6773501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6783501a2bdSLois Curfman McInnes #else 6793501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6803501a2bdSLois Curfman McInnes #endif 6813501a2bdSLois Curfman McInnes } 6823501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6833501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6843501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6853501a2bdSLois Curfman McInnes } 6863501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6873501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6880452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6893501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 690cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 691096963f5SLois Curfman McInnes *norm = 0.0; 6923501a2bdSLois Curfman McInnes v = mat->v; 6933501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 6943501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 69567e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 6963501a2bdSLois Curfman McInnes } 6973501a2bdSLois Curfman McInnes } 6983501a2bdSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 6993501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 7003501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 7013501a2bdSLois Curfman McInnes } 7020452661fSBarry Smith PetscFree(tmp); 7033501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 7043501a2bdSLois Curfman McInnes } 7053501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 7063501a2bdSLois Curfman McInnes double ntemp; 7073501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 7083501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 7093501a2bdSLois Curfman McInnes } 7103501a2bdSLois Curfman McInnes else { 7113501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 7123501a2bdSLois Curfman McInnes } 7133501a2bdSLois Curfman McInnes } 7143501a2bdSLois Curfman McInnes return 0; 7153501a2bdSLois Curfman McInnes } 7163501a2bdSLois Curfman McInnes 7173501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7183501a2bdSLois Curfman McInnes { 7193501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7203501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7213501a2bdSLois Curfman McInnes Mat B; 7223501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 7233501a2bdSLois Curfman McInnes int j, i, ierr; 7243501a2bdSLois Curfman McInnes Scalar *v; 7253501a2bdSLois Curfman McInnes 7263501a2bdSLois Curfman McInnes if (!matout && M != N) 7273501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 728b4fd4287SBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 729ed2daf61SLois Curfman McInnes CHKERRQ(ierr); 7303501a2bdSLois Curfman McInnes 7313501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 7320452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 7333501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 7343501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 7353501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 7363501a2bdSLois Curfman McInnes v += m; 7373501a2bdSLois Curfman McInnes } 7380452661fSBarry Smith PetscFree(rwork); 7393501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7403501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7413501a2bdSLois Curfman McInnes if (matout) { 7423501a2bdSLois Curfman McInnes *matout = B; 7433501a2bdSLois Curfman McInnes } else { 7443501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 7450452661fSBarry Smith PetscFree(a->rowners); 7463501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7473501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7483501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7490452661fSBarry Smith PetscFree(a); 7503501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7510452661fSBarry Smith PetscHeaderDestroy(B); 7523501a2bdSLois Curfman McInnes } 753096963f5SLois Curfman McInnes return 0; 754096963f5SLois Curfman McInnes } 755096963f5SLois Curfman McInnes 7563d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 7578965ea79SLois Curfman McInnes 7588965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 75939ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 76039ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 76139ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 762096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 763*8aaee692SLois Curfman McInnes MatSolve_MPIDense,0, 76439ddd567SLois Curfman McInnes 0,0, 765*8aaee692SLois Curfman McInnes MatLUFactor_MPIDense,0, 766*8aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 76739ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 768096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 76939ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7708965ea79SLois Curfman McInnes 0, 77139ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 772*8aaee692SLois Curfman McInnes 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, 773*8aaee692SLois Curfman McInnes 0,0, 77439ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 77539ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 77639ddd567SLois Curfman McInnes 0,0, 7773d1612f7SBarry Smith 0,0,0,0,0,MatConvertSameType_MPIDense, 778b49de8d1SLois Curfman McInnes 0,0,0,0,0, 779b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIDense}; 7808965ea79SLois Curfman McInnes 7818965ea79SLois Curfman McInnes /*@C 78239ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7838965ea79SLois Curfman McInnes 7848965ea79SLois Curfman McInnes Input Parameters: 7858965ea79SLois Curfman McInnes . comm - MPI communicator 7868965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 7878965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 7888965ea79SLois Curfman McInnes if N is given) 7898965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 7908965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 7918965ea79SLois Curfman McInnes if n is given) 792b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 793dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 7948965ea79SLois Curfman McInnes 7958965ea79SLois Curfman McInnes Output Parameter: 7968965ea79SLois Curfman McInnes . newmat - the matrix 7978965ea79SLois Curfman McInnes 7988965ea79SLois Curfman McInnes Notes: 79939ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 80039ddd567SLois Curfman McInnes storage by columns. 8018965ea79SLois Curfman McInnes 80218f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 80318f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 804b4fd4287SBarry Smith set data=PETSC_NULL. 80518f449edSLois Curfman McInnes 8068965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 8078965ea79SLois Curfman McInnes (possibly both). 8088965ea79SLois Curfman McInnes 8093501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 8103501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 8113501a2bdSLois Curfman McInnes 81239ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 8138965ea79SLois Curfman McInnes 81439ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 8158965ea79SLois Curfman McInnes @*/ 81618f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 8178965ea79SLois Curfman McInnes { 8188965ea79SLois Curfman McInnes Mat mat; 81939ddd567SLois Curfman McInnes Mat_MPIDense *a; 82039ddd567SLois Curfman McInnes int ierr, i; 8218965ea79SLois Curfman McInnes 822ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 823ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 82418f449edSLois Curfman McInnes 8258965ea79SLois Curfman McInnes *newmat = 0; 8260452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 8278965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8280452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8298965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 83039ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 83139ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8328965ea79SLois Curfman McInnes mat->factor = 0; 8338965ea79SLois Curfman McInnes 8348965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8358965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 8368965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 8378965ea79SLois Curfman McInnes 83839ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 8398965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 84039ddd567SLois Curfman McInnes 84139ddd567SLois Curfman McInnes /* each row stores all columns */ 84239ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 843d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 844d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 8458965ea79SLois Curfman McInnes a->N = N; 8468965ea79SLois Curfman McInnes a->M = M; 84739ddd567SLois Curfman McInnes a->m = m; 84839ddd567SLois Curfman McInnes a->n = n; 8498965ea79SLois Curfman McInnes 8508965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 851d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 852d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 853d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 85439ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8558965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8568965ea79SLois Curfman McInnes a->rowners[0] = 0; 8578965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8588965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8598965ea79SLois Curfman McInnes } 8608965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8618965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 862d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 863d7e8b826SBarry Smith a->cowners[0] = 0; 864d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 865d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 866d7e8b826SBarry Smith } 8678965ea79SLois Curfman McInnes 86818f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8698965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8708965ea79SLois Curfman McInnes 8718965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8728965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8738965ea79SLois Curfman McInnes 8748965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8758965ea79SLois Curfman McInnes a->lvec = 0; 8768965ea79SLois Curfman McInnes a->Mvctx = 0; 8778965ea79SLois Curfman McInnes a->assembled = 0; 87839b7565bSBarry Smith a->roworiented = 1; 8798965ea79SLois Curfman McInnes 8808965ea79SLois Curfman McInnes *newmat = mat; 8818965ea79SLois Curfman McInnes return 0; 8828965ea79SLois Curfman McInnes } 8838965ea79SLois Curfman McInnes 8843d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 8858965ea79SLois Curfman McInnes { 8868965ea79SLois Curfman McInnes Mat mat; 8873501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 88839ddd567SLois Curfman McInnes int ierr; 8898965ea79SLois Curfman McInnes 8903d1612f7SBarry Smith if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIDense:Must assemble matrix"); 8918965ea79SLois Curfman McInnes *newmat = 0; 8920452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 8938965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8940452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8958965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 89639ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 89739ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8983501a2bdSLois Curfman McInnes mat->factor = A->factor; 8998965ea79SLois Curfman McInnes 9008965ea79SLois Curfman McInnes a->m = oldmat->m; 9018965ea79SLois Curfman McInnes a->n = oldmat->n; 9028965ea79SLois Curfman McInnes a->M = oldmat->M; 9038965ea79SLois Curfman McInnes a->N = oldmat->N; 9048965ea79SLois Curfman McInnes 9058965ea79SLois Curfman McInnes a->assembled = 1; 9068965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 9078965ea79SLois Curfman McInnes a->rend = oldmat->rend; 9088965ea79SLois Curfman McInnes a->size = oldmat->size; 9098965ea79SLois Curfman McInnes a->rank = oldmat->rank; 9108965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 9118965ea79SLois Curfman McInnes 9120452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 91339ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9148965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 9158965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 9168965ea79SLois Curfman McInnes 9178965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 9188965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 91955659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 9208965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 9218965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 9228965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9238965ea79SLois Curfman McInnes *newmat = mat; 9248965ea79SLois Curfman McInnes return 0; 9258965ea79SLois Curfman McInnes } 9268965ea79SLois Curfman McInnes 9278965ea79SLois Curfman McInnes #include "sysio.h" 9288965ea79SLois Curfman McInnes 92939ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 9308965ea79SLois Curfman McInnes { 9318965ea79SLois Curfman McInnes Mat A; 9328965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 9338965ea79SLois Curfman McInnes Scalar *vals,*svals; 9348965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 9358965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 9368965ea79SLois Curfman McInnes MPI_Status status; 9378965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 9388965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 9398965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 9408965ea79SLois Curfman McInnes 9418965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 9428965ea79SLois Curfman McInnes if (!rank) { 9438965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 9448965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 94539ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 9468965ea79SLois Curfman McInnes } 9478965ea79SLois Curfman McInnes 9488965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 9498965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 9508965ea79SLois Curfman McInnes /* determine ownership of all rows */ 9518965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 9520452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 9538965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 9548965ea79SLois Curfman McInnes rowners[0] = 0; 9558965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 9568965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 9578965ea79SLois Curfman McInnes } 9588965ea79SLois Curfman McInnes rstart = rowners[rank]; 9598965ea79SLois Curfman McInnes rend = rowners[rank+1]; 9608965ea79SLois Curfman McInnes 9618965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 9620452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 9638965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 9648965ea79SLois Curfman McInnes if (!rank) { 9650452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 9668965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 9670452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 9688965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 9698965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 9700452661fSBarry Smith PetscFree(sndcounts); 9718965ea79SLois Curfman McInnes } 9728965ea79SLois Curfman McInnes else { 9738965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 9748965ea79SLois Curfman McInnes } 9758965ea79SLois Curfman McInnes 9768965ea79SLois Curfman McInnes if (!rank) { 9778965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 9780452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 979cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 9808965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9818965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 9828965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 9838965ea79SLois Curfman McInnes } 9848965ea79SLois Curfman McInnes } 9850452661fSBarry Smith PetscFree(rowlengths); 9868965ea79SLois Curfman McInnes 9878965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 9888965ea79SLois Curfman McInnes maxnz = 0; 9898965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9900452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 9918965ea79SLois Curfman McInnes } 9920452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 9938965ea79SLois Curfman McInnes 9948965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 9958965ea79SLois Curfman McInnes nz = procsnz[0]; 9960452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9978965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 9988965ea79SLois Curfman McInnes 9998965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 10008965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10018965ea79SLois Curfman McInnes nz = procsnz[i]; 10028965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 10038965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 10048965ea79SLois Curfman McInnes } 10050452661fSBarry Smith PetscFree(cols); 10068965ea79SLois Curfman McInnes } 10078965ea79SLois Curfman McInnes else { 10088965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 10098965ea79SLois Curfman McInnes nz = 0; 10108965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10118965ea79SLois Curfman McInnes nz += ourlens[i]; 10128965ea79SLois Curfman McInnes } 10130452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 10148965ea79SLois Curfman McInnes 10158965ea79SLois Curfman McInnes /* receive message of column indices*/ 10168965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 10178965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 101839ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10198965ea79SLois Curfman McInnes } 10208965ea79SLois Curfman McInnes 10218965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1022cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 10238965ea79SLois Curfman McInnes jj = 0; 10248965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10258965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 10268965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 10278965ea79SLois Curfman McInnes jj++; 10288965ea79SLois Curfman McInnes } 10298965ea79SLois Curfman McInnes } 10308965ea79SLois Curfman McInnes 10318965ea79SLois Curfman McInnes /* create our matrix */ 10328965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10338965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 10348965ea79SLois Curfman McInnes } 103539ddd567SLois Curfman McInnes if (type == MATMPIDENSE) { 1036b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat); CHKERRQ(ierr); 10378965ea79SLois Curfman McInnes } 10388965ea79SLois Curfman McInnes A = *newmat; 10398965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10408965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 10418965ea79SLois Curfman McInnes } 10428965ea79SLois Curfman McInnes 10438965ea79SLois Curfman McInnes if (!rank) { 10440452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 10458965ea79SLois Curfman McInnes 10468965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 10478965ea79SLois Curfman McInnes nz = procsnz[0]; 10488965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10498965ea79SLois Curfman McInnes 10508965ea79SLois Curfman McInnes /* insert into matrix */ 10518965ea79SLois Curfman McInnes jj = rstart; 10528965ea79SLois Curfman McInnes smycols = mycols; 10538965ea79SLois Curfman McInnes svals = vals; 10548965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10558965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10568965ea79SLois Curfman McInnes smycols += ourlens[i]; 10578965ea79SLois Curfman McInnes svals += ourlens[i]; 10588965ea79SLois Curfman McInnes jj++; 10598965ea79SLois Curfman McInnes } 10608965ea79SLois Curfman McInnes 10618965ea79SLois Curfman McInnes /* read in other processors and ship out */ 10628965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10638965ea79SLois Curfman McInnes nz = procsnz[i]; 10648965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10658965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 10668965ea79SLois Curfman McInnes } 10670452661fSBarry Smith PetscFree(procsnz); 10688965ea79SLois Curfman McInnes } 10698965ea79SLois Curfman McInnes else { 10708965ea79SLois Curfman McInnes /* receive numeric values */ 10710452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 10728965ea79SLois Curfman McInnes 10738965ea79SLois Curfman McInnes /* receive message of values*/ 10748965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 10758965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 107639ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10778965ea79SLois Curfman McInnes 10788965ea79SLois Curfman McInnes /* insert into matrix */ 10798965ea79SLois Curfman McInnes jj = rstart; 10808965ea79SLois Curfman McInnes smycols = mycols; 10818965ea79SLois Curfman McInnes svals = vals; 10828965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10838965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10848965ea79SLois Curfman McInnes smycols += ourlens[i]; 10858965ea79SLois Curfman McInnes svals += ourlens[i]; 10868965ea79SLois Curfman McInnes jj++; 10878965ea79SLois Curfman McInnes } 10888965ea79SLois Curfman McInnes } 10890452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 10908965ea79SLois Curfman McInnes 10918965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10928965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10938965ea79SLois Curfman McInnes return 0; 10948965ea79SLois Curfman McInnes } 1095