18965ea79SLois Curfman McInnes #ifndef lint 2*18f449edSLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.11 1995/11/09 22:28:45 bsmith Exp curfman $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes #include "inline/spops.h" 128965ea79SLois Curfman McInnes 1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 148965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 158965ea79SLois Curfman McInnes { 1639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1739ddd567SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 188965ea79SLois Curfman McInnes 1939ddd567SLois Curfman McInnes if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) { 2039ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 218965ea79SLois Curfman McInnes } 2239ddd567SLois Curfman McInnes mdn->insertmode = addv; 238965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2439ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2539ddd567SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 268965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 278965ea79SLois Curfman McInnes row = idxm[i] - rstart; 288965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 2939ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 3039ddd567SLois Curfman McInnes if (idxn[j] >= mdn->N) 3139ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 3239ddd567SLois Curfman McInnes ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); 3339ddd567SLois Curfman McInnes CHKERRQ(ierr); 348965ea79SLois Curfman McInnes } 358965ea79SLois Curfman McInnes } 368965ea79SLois Curfman McInnes else { 3739ddd567SLois Curfman McInnes ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); 3839ddd567SLois Curfman McInnes CHKERRQ(ierr); 398965ea79SLois Curfman McInnes } 408965ea79SLois Curfman McInnes } 418965ea79SLois Curfman McInnes return 0; 428965ea79SLois Curfman McInnes } 438965ea79SLois Curfman McInnes 4439ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 458965ea79SLois Curfman McInnes { 4639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 478965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 4839ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 498965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 5039ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 518965ea79SLois Curfman McInnes InsertMode addv; 5239ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 538965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 548965ea79SLois Curfman McInnes 558965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 5639ddd567SLois Curfman McInnes MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 5739ddd567SLois Curfman McInnes MPI_BOR,comm); 5839ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 5939ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 608965ea79SLois Curfman McInnes } 6139ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 628965ea79SLois Curfman McInnes 638965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 640452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 65cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 660452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 6739ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 6839ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 698965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 708965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 718965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 728965ea79SLois Curfman McInnes } 738965ea79SLois Curfman McInnes } 748965ea79SLois Curfman McInnes } 758965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 768965ea79SLois Curfman McInnes 778965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 780452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 7939ddd567SLois Curfman McInnes MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 808965ea79SLois Curfman McInnes nreceives = work[rank]; 8139ddd567SLois Curfman McInnes MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 828965ea79SLois Curfman McInnes nmax = work[rank]; 830452661fSBarry Smith PetscFree(work); 848965ea79SLois Curfman McInnes 858965ea79SLois Curfman McInnes /* post receives: 868965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 878965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 888965ea79SLois Curfman McInnes to simplify the message passing. 898965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 908965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 918965ea79SLois Curfman McInnes this is a lot of wasted space. 928965ea79SLois Curfman McInnes 938965ea79SLois Curfman McInnes This could be done better. 948965ea79SLois Curfman McInnes */ 950452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 968965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 970452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 988965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 998965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 10039ddd567SLois Curfman McInnes MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1018965ea79SLois Curfman McInnes comm,recv_waits+i); 1028965ea79SLois Curfman McInnes } 1038965ea79SLois Curfman McInnes 1048965ea79SLois Curfman McInnes /* do sends: 1058965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1068965ea79SLois Curfman McInnes the ith processor 1078965ea79SLois Curfman McInnes */ 1080452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 10939ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1100452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1118965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1120452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1138965ea79SLois Curfman McInnes starts[0] = 0; 1148965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 11539ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 11639ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 11739ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 11839ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1198965ea79SLois Curfman McInnes } 1200452661fSBarry Smith PetscFree(owner); 1218965ea79SLois Curfman McInnes starts[0] = 0; 1228965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1238965ea79SLois Curfman McInnes count = 0; 1248965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1258965ea79SLois Curfman McInnes if (procs[i]) { 12639ddd567SLois Curfman McInnes MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1278965ea79SLois Curfman McInnes comm,send_waits+count++); 1288965ea79SLois Curfman McInnes } 1298965ea79SLois Curfman McInnes } 1300452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1318965ea79SLois Curfman McInnes 1328965ea79SLois Curfman McInnes /* Free cache space */ 13339ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1348965ea79SLois Curfman McInnes 13539ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 13639ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 13739ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 13839ddd567SLois Curfman McInnes mdn->rmax = nmax; 1398965ea79SLois Curfman McInnes 1408965ea79SLois Curfman McInnes return 0; 1418965ea79SLois Curfman McInnes } 14239ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1438965ea79SLois Curfman McInnes 14439ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1458965ea79SLois Curfman McInnes { 14639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1478965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 14839ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1498965ea79SLois Curfman McInnes Scalar *values,val; 15039ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1518965ea79SLois Curfman McInnes 1528965ea79SLois Curfman McInnes /* wait on receives */ 1538965ea79SLois Curfman McInnes while (count) { 15439ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 1558965ea79SLois Curfman McInnes /* unpack receives into our local space */ 15639ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 1578965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 1588965ea79SLois Curfman McInnes n = n/3; 1598965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 16039ddd567SLois Curfman McInnes row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 1618965ea79SLois Curfman McInnes col = (int) PETSCREAL(values[3*i+1]); 1628965ea79SLois Curfman McInnes val = values[3*i+2]; 16339ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 16439ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 1658965ea79SLois Curfman McInnes } 16639ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 1678965ea79SLois Curfman McInnes } 1688965ea79SLois Curfman McInnes count--; 1698965ea79SLois Curfman McInnes } 1700452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 1718965ea79SLois Curfman McInnes 1728965ea79SLois Curfman McInnes /* wait on sends */ 17339ddd567SLois Curfman McInnes if (mdn->nsends) { 1740452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 1758965ea79SLois Curfman McInnes CHKPTRQ(send_status); 17639ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 1770452661fSBarry Smith PetscFree(send_status); 1788965ea79SLois Curfman McInnes } 1790452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 1808965ea79SLois Curfman McInnes 18139ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 18239ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 18339ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 1848965ea79SLois Curfman McInnes 18539ddd567SLois Curfman McInnes if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 18639ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 1878965ea79SLois Curfman McInnes } 18839ddd567SLois Curfman McInnes mdn->assembled = 1; 1898965ea79SLois Curfman McInnes return 0; 1908965ea79SLois Curfman McInnes } 1918965ea79SLois Curfman McInnes 19239ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 1938965ea79SLois Curfman McInnes { 19439ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 19539ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 1968965ea79SLois Curfman McInnes } 1978965ea79SLois Curfman McInnes 1988965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 1998965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2008965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2013501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2028965ea79SLois Curfman McInnes routine. 2038965ea79SLois Curfman McInnes */ 20439ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2058965ea79SLois Curfman McInnes { 20639ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2078965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2088965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2098965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2108965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2118965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2128965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2138965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2148965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2158965ea79SLois Curfman McInnes IS istmp; 2168965ea79SLois Curfman McInnes 21739ddd567SLois Curfman McInnes if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 2188965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 2198965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2208965ea79SLois Curfman McInnes 2218965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2220452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 223cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2240452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2258965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2268965ea79SLois Curfman McInnes idx = rows[i]; 2278965ea79SLois Curfman McInnes found = 0; 2288965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2298965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2308965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2318965ea79SLois Curfman McInnes } 2328965ea79SLois Curfman McInnes } 23339ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2348965ea79SLois Curfman McInnes } 2358965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2368965ea79SLois Curfman McInnes 2378965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2380452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2398965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2408965ea79SLois Curfman McInnes nrecvs = work[rank]; 2418965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2428965ea79SLois Curfman McInnes nmax = work[rank]; 2430452661fSBarry Smith PetscFree(work); 2448965ea79SLois Curfman McInnes 2458965ea79SLois Curfman McInnes /* post receives: */ 2460452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2478965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2480452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2498965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2508965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2518965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2528965ea79SLois Curfman McInnes } 2538965ea79SLois Curfman McInnes 2548965ea79SLois Curfman McInnes /* do sends: 2558965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2568965ea79SLois Curfman McInnes the ith processor 2578965ea79SLois Curfman McInnes */ 2580452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 2590452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 2608965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 2610452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 2628965ea79SLois Curfman McInnes starts[0] = 0; 2638965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2648965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2658965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 2668965ea79SLois Curfman McInnes } 2678965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 2688965ea79SLois Curfman McInnes 2698965ea79SLois Curfman McInnes starts[0] = 0; 2708965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2718965ea79SLois Curfman McInnes count = 0; 2728965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 2738965ea79SLois Curfman McInnes if (procs[i]) { 2748965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 2758965ea79SLois Curfman McInnes } 2768965ea79SLois Curfman McInnes } 2770452661fSBarry Smith PetscFree(starts); 2788965ea79SLois Curfman McInnes 2798965ea79SLois Curfman McInnes base = owners[rank]; 2808965ea79SLois Curfman McInnes 2818965ea79SLois Curfman McInnes /* wait on receives */ 2820452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 2838965ea79SLois Curfman McInnes source = lens + nrecvs; 2848965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 2858965ea79SLois Curfman McInnes while (count) { 2868965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 2878965ea79SLois Curfman McInnes /* unpack receives into our local space */ 2888965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 2898965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 2908965ea79SLois Curfman McInnes lens[imdex] = n; 2918965ea79SLois Curfman McInnes slen += n; 2928965ea79SLois Curfman McInnes count--; 2938965ea79SLois Curfman McInnes } 2940452661fSBarry Smith PetscFree(recv_waits); 2958965ea79SLois Curfman McInnes 2968965ea79SLois Curfman McInnes /* move the data into the send scatter */ 2970452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 2988965ea79SLois Curfman McInnes count = 0; 2998965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3008965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3018965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3028965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3038965ea79SLois Curfman McInnes } 3048965ea79SLois Curfman McInnes } 3050452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3060452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3078965ea79SLois Curfman McInnes 3088965ea79SLois Curfman McInnes /* actually zap the local rows */ 3098965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3108965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3110452661fSBarry Smith PetscFree(lrows); 3128965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3138965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3148965ea79SLois Curfman McInnes 3158965ea79SLois Curfman McInnes /* wait on sends */ 3168965ea79SLois Curfman McInnes if (nsends) { 3170452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3188965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3198965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3200452661fSBarry Smith PetscFree(send_status); 3218965ea79SLois Curfman McInnes } 3220452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3238965ea79SLois Curfman McInnes 3248965ea79SLois Curfman McInnes return 0; 3258965ea79SLois Curfman McInnes } 3268965ea79SLois Curfman McInnes 32739ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3288965ea79SLois Curfman McInnes { 32939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3308965ea79SLois Curfman McInnes int ierr; 33139ddd567SLois Curfman McInnes if (!mdn->assembled) 33239ddd567SLois Curfman McInnes SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 33339ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 33439ddd567SLois Curfman McInnes CHKERRQ(ierr); 33539ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 33639ddd567SLois Curfman McInnes CHKERRQ(ierr); 33739ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3388965ea79SLois Curfman McInnes return 0; 3398965ea79SLois Curfman McInnes } 3408965ea79SLois Curfman McInnes 34139ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3428965ea79SLois Curfman McInnes { 34339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3448965ea79SLois Curfman McInnes int ierr; 34539ddd567SLois Curfman McInnes if (!mdn->assembled) 34639ddd567SLois Curfman McInnes SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 3473501a2bdSLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 34839ddd567SLois Curfman McInnes CHKERRQ(ierr); 3493501a2bdSLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 35039ddd567SLois Curfman McInnes CHKERRQ(ierr); 35139ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3528965ea79SLois Curfman McInnes return 0; 3538965ea79SLois Curfman McInnes } 3548965ea79SLois Curfman McInnes 355096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 356096963f5SLois Curfman McInnes { 357096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 358096963f5SLois Curfman McInnes int ierr; 3593501a2bdSLois Curfman McInnes Scalar zero = 0.0; 360096963f5SLois Curfman McInnes 361096963f5SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix"); 3623501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 363096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 364096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 365096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 366096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 367096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 368096963f5SLois Curfman McInnes return 0; 369096963f5SLois Curfman McInnes } 370096963f5SLois Curfman McInnes 371096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 372096963f5SLois Curfman McInnes { 373096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 374096963f5SLois Curfman McInnes int ierr; 375096963f5SLois Curfman McInnes 376096963f5SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix"); 3773501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 3783501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 379096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 380096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 381096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 382096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 383096963f5SLois Curfman McInnes return 0; 384096963f5SLois Curfman McInnes } 385096963f5SLois Curfman McInnes 38639ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 3878965ea79SLois Curfman McInnes { 38839ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 389096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 390096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 391096963f5SLois Curfman McInnes Scalar *x; 392ed3cc1f0SBarry Smith 39339ddd567SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 394096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 395096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 396096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 397096963f5SLois Curfman McInnes radd = a->rstart*m*m; 398096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 399096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 400096963f5SLois Curfman McInnes } 401096963f5SLois Curfman McInnes return 0; 4028965ea79SLois Curfman McInnes } 4038965ea79SLois Curfman McInnes 40439ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4058965ea79SLois Curfman McInnes { 4068965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4073501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4088965ea79SLois Curfman McInnes int ierr; 409ed3cc1f0SBarry Smith 4108965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4113501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4128965ea79SLois Curfman McInnes #endif 4130452661fSBarry Smith PetscFree(mdn->rowners); 4143501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4153501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4163501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 4170452661fSBarry Smith PetscFree(mdn); 4188965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4190452661fSBarry Smith PetscHeaderDestroy(mat); 4208965ea79SLois Curfman McInnes return 0; 4218965ea79SLois Curfman McInnes } 42239ddd567SLois Curfman McInnes 4238965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4248965ea79SLois Curfman McInnes 42539ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4268965ea79SLois Curfman McInnes { 42739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4288965ea79SLois Curfman McInnes int ierr; 42939ddd567SLois Curfman McInnes if (mdn->size == 1) { 43039ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4318965ea79SLois Curfman McInnes } 43239ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4338965ea79SLois Curfman McInnes return 0; 4348965ea79SLois Curfman McInnes } 4358965ea79SLois Curfman McInnes 43639ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4378965ea79SLois Curfman McInnes { 43839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 43939ddd567SLois Curfman McInnes int ierr, format; 4408965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 4418965ea79SLois Curfman McInnes FILE *fd; 4428965ea79SLois Curfman McInnes 4433501a2bdSLois Curfman McInnes ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 4448965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 4458965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 4463501a2bdSLois Curfman McInnes if (format == FILE_FORMAT_INFO_DETAILED) { 447096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 448096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 449096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 450096963f5SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 4513501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 452096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 453096963f5SLois Curfman McInnes fflush(fd); 454096963f5SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4553501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 4563501a2bdSLois Curfman McInnes return 0; 4573501a2bdSLois Curfman McInnes } 4583501a2bdSLois Curfman McInnes else if (format == FILE_FORMAT_INFO) { 459096963f5SLois Curfman McInnes return 0; 4608965ea79SLois Curfman McInnes } 4618965ea79SLois Curfman McInnes } 4628965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 4638965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 46439ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 46539ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 46639ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4678965ea79SLois Curfman McInnes fflush(fd); 4688965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4698965ea79SLois Curfman McInnes } 4708965ea79SLois Curfman McInnes else { 47139ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 4728965ea79SLois Curfman McInnes if (size == 1) { 47339ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4748965ea79SLois Curfman McInnes } 4758965ea79SLois Curfman McInnes else { 4768965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 4778965ea79SLois Curfman McInnes Mat A; 47839ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 47939ddd567SLois Curfman McInnes Scalar *vals; 48039ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 4818965ea79SLois Curfman McInnes 4828965ea79SLois Curfman McInnes if (!rank) { 483*18f449edSLois Curfman McInnes ierr = MatCreateMPIDense(mat->comm,M,M,N,N,0,&A); CHKERRQ(ierr); 4848965ea79SLois Curfman McInnes } 4858965ea79SLois Curfman McInnes else { 486*18f449edSLois Curfman McInnes ierr = MatCreateMPIDense(mat->comm,0,M,N,N,0,&A); CHKERRQ(ierr); 4878965ea79SLois Curfman McInnes } 4888965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 4898965ea79SLois Curfman McInnes 49039ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 49139ddd567SLois Curfman McInnes but it's quick for now */ 49239ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 4938965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 49439ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 49539ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 49639ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 49739ddd567SLois Curfman McInnes row++; 4988965ea79SLois Curfman McInnes } 4998965ea79SLois Curfman McInnes 5008965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5018965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5028965ea79SLois Curfman McInnes if (!rank) { 50339ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5048965ea79SLois Curfman McInnes } 5058965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5068965ea79SLois Curfman McInnes } 5078965ea79SLois Curfman McInnes } 5088965ea79SLois Curfman McInnes return 0; 5098965ea79SLois Curfman McInnes } 5108965ea79SLois Curfman McInnes 51139ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5128965ea79SLois Curfman McInnes { 5138965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 51439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 5158965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 51639ddd567SLois Curfman McInnes int ierr; 5178965ea79SLois Curfman McInnes 51839ddd567SLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 5198965ea79SLois Curfman McInnes if (!viewer) { 5208965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 5218965ea79SLois Curfman McInnes } 52239ddd567SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 52339ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5248965ea79SLois Curfman McInnes } 5258965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 52639ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5278965ea79SLois Curfman McInnes } 5288965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 52939ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5308965ea79SLois Curfman McInnes } 5318965ea79SLois Curfman McInnes return 0; 5328965ea79SLois Curfman McInnes } 5338965ea79SLois Curfman McInnes 5343501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5358965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5368965ea79SLois Curfman McInnes { 5373501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5383501a2bdSLois Curfman McInnes Mat mdn = mat->A; 53939ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5408965ea79SLois Curfman McInnes 5413501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5428965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 5438965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 5448965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5453501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 5468965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5478965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5483501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 5498965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5508965ea79SLois Curfman McInnes } 5518965ea79SLois Curfman McInnes return 0; 5528965ea79SLois Curfman McInnes } 5538965ea79SLois Curfman McInnes 55439ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 5558965ea79SLois Curfman McInnes { 55639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 5578965ea79SLois Curfman McInnes 5588965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 5598965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 5608965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 5618965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 5628965ea79SLois Curfman McInnes MatSetOption(a->A,op); 5638965ea79SLois Curfman McInnes } 5648965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 5658965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 5668965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 5678965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 56839ddd567SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 5698965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 57039ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");} 5718965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 57239ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 5738965ea79SLois Curfman McInnes else 57439ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 5758965ea79SLois Curfman McInnes return 0; 5768965ea79SLois Curfman McInnes } 5778965ea79SLois Curfman McInnes 5783501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 5798965ea79SLois Curfman McInnes { 5803501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5818965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 5828965ea79SLois Curfman McInnes return 0; 5838965ea79SLois Curfman McInnes } 5848965ea79SLois Curfman McInnes 5853501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 5868965ea79SLois Curfman McInnes { 5873501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5888965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 5898965ea79SLois Curfman McInnes return 0; 5908965ea79SLois Curfman McInnes } 5918965ea79SLois Curfman McInnes 5923501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 5938965ea79SLois Curfman McInnes { 5943501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5958965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 5968965ea79SLois Curfman McInnes return 0; 5978965ea79SLois Curfman McInnes } 5988965ea79SLois Curfman McInnes 5993501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6008965ea79SLois Curfman McInnes { 6013501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 60239ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6038965ea79SLois Curfman McInnes 60439ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6058965ea79SLois Curfman McInnes lrow = row - rstart; 60639ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6078965ea79SLois Curfman McInnes } 6088965ea79SLois Curfman McInnes 60939ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6108965ea79SLois Curfman McInnes { 6110452661fSBarry Smith if (idx) PetscFree(*idx); 6120452661fSBarry Smith if (v) PetscFree(*v); 6138965ea79SLois Curfman McInnes return 0; 6148965ea79SLois Curfman McInnes } 6158965ea79SLois Curfman McInnes 616cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 617096963f5SLois Curfman McInnes { 6183501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6193501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6203501a2bdSLois Curfman McInnes int ierr, i, j; 6213501a2bdSLois Curfman McInnes double sum = 0.0; 6223501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6233501a2bdSLois Curfman McInnes 6243501a2bdSLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix"); 6253501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6263501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6273501a2bdSLois Curfman McInnes } else { 6283501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6293501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6303501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6313501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6323501a2bdSLois Curfman McInnes #else 6333501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6343501a2bdSLois Curfman McInnes #endif 6353501a2bdSLois Curfman McInnes } 6363501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6373501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6383501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6393501a2bdSLois Curfman McInnes } 6403501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6413501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6420452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6433501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 644cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 645096963f5SLois Curfman McInnes *norm = 0.0; 6463501a2bdSLois Curfman McInnes v = mat->v; 6473501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 6483501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 64967e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 6503501a2bdSLois Curfman McInnes } 6513501a2bdSLois Curfman McInnes } 6523501a2bdSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 6533501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 6543501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 6553501a2bdSLois Curfman McInnes } 6560452661fSBarry Smith PetscFree(tmp); 6573501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 6583501a2bdSLois Curfman McInnes } 6593501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 6603501a2bdSLois Curfman McInnes double ntemp; 6613501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 6623501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 6633501a2bdSLois Curfman McInnes } 6643501a2bdSLois Curfman McInnes else { 6653501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 6663501a2bdSLois Curfman McInnes } 6673501a2bdSLois Curfman McInnes } 6683501a2bdSLois Curfman McInnes return 0; 6693501a2bdSLois Curfman McInnes } 6703501a2bdSLois Curfman McInnes 6713501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 6723501a2bdSLois Curfman McInnes { 6733501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6743501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 6753501a2bdSLois Curfman McInnes Mat B; 6763501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 6773501a2bdSLois Curfman McInnes int j, i, ierr; 6783501a2bdSLois Curfman McInnes Scalar *v; 6793501a2bdSLois Curfman McInnes 6803501a2bdSLois Curfman McInnes if (!matout && M != N) 6813501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 682*18f449edSLois Curfman McInnes ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,&B); CHKERRQ(ierr); 6833501a2bdSLois Curfman McInnes 6843501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 6850452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 6863501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 6873501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 6883501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 6893501a2bdSLois Curfman McInnes v += m; 6903501a2bdSLois Curfman McInnes } 6910452661fSBarry Smith PetscFree(rwork); 6923501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 6933501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 6943501a2bdSLois Curfman McInnes if (matout) { 6953501a2bdSLois Curfman McInnes *matout = B; 6963501a2bdSLois Curfman McInnes } else { 6973501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 6980452661fSBarry Smith PetscFree(a->rowners); 6993501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7003501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7013501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7020452661fSBarry Smith PetscFree(a); 7033501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7040452661fSBarry Smith PetscHeaderDestroy(B); 7053501a2bdSLois Curfman McInnes } 706096963f5SLois Curfman McInnes return 0; 707096963f5SLois Curfman McInnes } 708096963f5SLois Curfman McInnes 70955659b69SBarry Smith static int MatCopyPrivate_MPIDense(Mat,Mat *,int); 7108965ea79SLois Curfman McInnes 7118965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 71239ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 71339ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 71439ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 715096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 71639ddd567SLois Curfman McInnes 0,0, 71739ddd567SLois Curfman McInnes 0,0,0, 7183501a2bdSLois Curfman McInnes 0,0,MatTranspose_MPIDense, 71939ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 720096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 72139ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7228965ea79SLois Curfman McInnes 0, 72339ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 72439ddd567SLois Curfman McInnes 0, 72539ddd567SLois Curfman McInnes 0,0,0,0, 72639ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 72739ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 72839ddd567SLois Curfman McInnes 0,0, 72939ddd567SLois Curfman McInnes 0,0,0,0,0,MatCopyPrivate_MPIDense}; 7308965ea79SLois Curfman McInnes 7318965ea79SLois Curfman McInnes /*@C 73239ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7338965ea79SLois Curfman McInnes 7348965ea79SLois Curfman McInnes Input Parameters: 7358965ea79SLois Curfman McInnes . comm - MPI communicator 7368965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 7378965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 7388965ea79SLois Curfman McInnes if N is given) 7398965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 7408965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 7418965ea79SLois Curfman McInnes if n is given) 742*18f449edSLois Curfman McInnes . data - optional location of matrix data. Set data=0 for PETSc to 743*18f449edSLois Curfman McInnes control all matrix memory allocation. 7448965ea79SLois Curfman McInnes 7458965ea79SLois Curfman McInnes Output Parameter: 7468965ea79SLois Curfman McInnes . newmat - the matrix 7478965ea79SLois Curfman McInnes 7488965ea79SLois Curfman McInnes Notes: 74939ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 75039ddd567SLois Curfman McInnes storage by columns. 7518965ea79SLois Curfman McInnes 752*18f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 753*18f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 754*18f449edSLois Curfman McInnes set data=0. 755*18f449edSLois Curfman McInnes 7568965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 7578965ea79SLois Curfman McInnes (possibly both). 7588965ea79SLois Curfman McInnes 7593501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 7603501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 7613501a2bdSLois Curfman McInnes 76239ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 7638965ea79SLois Curfman McInnes 76439ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 7658965ea79SLois Curfman McInnes @*/ 766*18f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 7678965ea79SLois Curfman McInnes { 7688965ea79SLois Curfman McInnes Mat mat; 76939ddd567SLois Curfman McInnes Mat_MPIDense *a; 77039ddd567SLois Curfman McInnes int ierr, i; 7718965ea79SLois Curfman McInnes 772*18f449edSLois Curfman McInnes /* Note: for now, this assumes that the user knows what he's doing if 773*18f449edSLois Curfman McInnes data is specified above. */ 774*18f449edSLois Curfman McInnes 7758965ea79SLois Curfman McInnes *newmat = 0; 7760452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 7778965ea79SLois Curfman McInnes PLogObjectCreate(mat); 7780452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 7798965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 78039ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 78139ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 7828965ea79SLois Curfman McInnes mat->factor = 0; 7838965ea79SLois Curfman McInnes 7848965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 7858965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 7868965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 7878965ea79SLois Curfman McInnes 78839ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 7898965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 79039ddd567SLois Curfman McInnes 79139ddd567SLois Curfman McInnes /* each row stores all columns */ 79239ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 793d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 794d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 7958965ea79SLois Curfman McInnes a->N = N; 7968965ea79SLois Curfman McInnes a->M = M; 79739ddd567SLois Curfman McInnes a->m = m; 79839ddd567SLois Curfman McInnes a->n = n; 7998965ea79SLois Curfman McInnes 8008965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 801d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 802d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 803d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 80439ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8058965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8068965ea79SLois Curfman McInnes a->rowners[0] = 0; 8078965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8088965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8098965ea79SLois Curfman McInnes } 8108965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8118965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 812d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 813d7e8b826SBarry Smith a->cowners[0] = 0; 814d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 815d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 816d7e8b826SBarry Smith } 8178965ea79SLois Curfman McInnes 818*18f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8198965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8208965ea79SLois Curfman McInnes 8218965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8228965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8238965ea79SLois Curfman McInnes 8248965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8258965ea79SLois Curfman McInnes a->lvec = 0; 8268965ea79SLois Curfman McInnes a->Mvctx = 0; 8278965ea79SLois Curfman McInnes a->assembled = 0; 8288965ea79SLois Curfman McInnes 8298965ea79SLois Curfman McInnes *newmat = mat; 8308965ea79SLois Curfman McInnes return 0; 8318965ea79SLois Curfman McInnes } 8328965ea79SLois Curfman McInnes 8333501a2bdSLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues) 8348965ea79SLois Curfman McInnes { 8358965ea79SLois Curfman McInnes Mat mat; 8363501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 83739ddd567SLois Curfman McInnes int ierr; 8388965ea79SLois Curfman McInnes 83939ddd567SLois Curfman McInnes if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix"); 8408965ea79SLois Curfman McInnes *newmat = 0; 8410452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 8428965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8430452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8448965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 84539ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 84639ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8473501a2bdSLois Curfman McInnes mat->factor = A->factor; 8488965ea79SLois Curfman McInnes 8498965ea79SLois Curfman McInnes a->m = oldmat->m; 8508965ea79SLois Curfman McInnes a->n = oldmat->n; 8518965ea79SLois Curfman McInnes a->M = oldmat->M; 8528965ea79SLois Curfman McInnes a->N = oldmat->N; 8538965ea79SLois Curfman McInnes 8548965ea79SLois Curfman McInnes a->assembled = 1; 8558965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 8568965ea79SLois Curfman McInnes a->rend = oldmat->rend; 8578965ea79SLois Curfman McInnes a->size = oldmat->size; 8588965ea79SLois Curfman McInnes a->rank = oldmat->rank; 8598965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8608965ea79SLois Curfman McInnes 8610452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 86239ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 8638965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 8648965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 8658965ea79SLois Curfman McInnes 8668965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 8678965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 86855659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 8698965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 8708965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 8718965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8728965ea79SLois Curfman McInnes *newmat = mat; 8738965ea79SLois Curfman McInnes return 0; 8748965ea79SLois Curfman McInnes } 8758965ea79SLois Curfman McInnes 8768965ea79SLois Curfman McInnes #include "sysio.h" 8778965ea79SLois Curfman McInnes 87839ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 8798965ea79SLois Curfman McInnes { 8808965ea79SLois Curfman McInnes Mat A; 8818965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 8828965ea79SLois Curfman McInnes Scalar *vals,*svals; 8838965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 8848965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 8858965ea79SLois Curfman McInnes MPI_Status status; 8868965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 8878965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 8888965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 8898965ea79SLois Curfman McInnes 8908965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 8918965ea79SLois Curfman McInnes if (!rank) { 8928965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 8938965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 89439ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 8958965ea79SLois Curfman McInnes } 8968965ea79SLois Curfman McInnes 8978965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 8988965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 8998965ea79SLois Curfman McInnes /* determine ownership of all rows */ 9008965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 9010452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 9028965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 9038965ea79SLois Curfman McInnes rowners[0] = 0; 9048965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 9058965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 9068965ea79SLois Curfman McInnes } 9078965ea79SLois Curfman McInnes rstart = rowners[rank]; 9088965ea79SLois Curfman McInnes rend = rowners[rank+1]; 9098965ea79SLois Curfman McInnes 9108965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 9110452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 9128965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 9138965ea79SLois Curfman McInnes if (!rank) { 9140452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 9158965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 9160452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 9178965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 9188965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 9190452661fSBarry Smith PetscFree(sndcounts); 9208965ea79SLois Curfman McInnes } 9218965ea79SLois Curfman McInnes else { 9228965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 9238965ea79SLois Curfman McInnes } 9248965ea79SLois Curfman McInnes 9258965ea79SLois Curfman McInnes if (!rank) { 9268965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 9270452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 928cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 9298965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9308965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 9318965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 9328965ea79SLois Curfman McInnes } 9338965ea79SLois Curfman McInnes } 9340452661fSBarry Smith PetscFree(rowlengths); 9358965ea79SLois Curfman McInnes 9368965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 9378965ea79SLois Curfman McInnes maxnz = 0; 9388965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9390452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 9408965ea79SLois Curfman McInnes } 9410452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 9428965ea79SLois Curfman McInnes 9438965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 9448965ea79SLois Curfman McInnes nz = procsnz[0]; 9450452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9468965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 9478965ea79SLois Curfman McInnes 9488965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 9498965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 9508965ea79SLois Curfman McInnes nz = procsnz[i]; 9518965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 9528965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 9538965ea79SLois Curfman McInnes } 9540452661fSBarry Smith PetscFree(cols); 9558965ea79SLois Curfman McInnes } 9568965ea79SLois Curfman McInnes else { 9578965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 9588965ea79SLois Curfman McInnes nz = 0; 9598965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 9608965ea79SLois Curfman McInnes nz += ourlens[i]; 9618965ea79SLois Curfman McInnes } 9620452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9638965ea79SLois Curfman McInnes 9648965ea79SLois Curfman McInnes /* receive message of column indices*/ 9658965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 9668965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 96739ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 9688965ea79SLois Curfman McInnes } 9698965ea79SLois Curfman McInnes 9708965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 971cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 9728965ea79SLois Curfman McInnes jj = 0; 9738965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 9748965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 9758965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 9768965ea79SLois Curfman McInnes jj++; 9778965ea79SLois Curfman McInnes } 9788965ea79SLois Curfman McInnes } 9798965ea79SLois Curfman McInnes 9808965ea79SLois Curfman McInnes /* create our matrix */ 9818965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 9828965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 9838965ea79SLois Curfman McInnes } 98439ddd567SLois Curfman McInnes if (type == MATMPIDENSE) { 985*18f449edSLois Curfman McInnes ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,0,newmat); CHKERRQ(ierr); 9868965ea79SLois Curfman McInnes } 9878965ea79SLois Curfman McInnes A = *newmat; 9888965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 9898965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 9908965ea79SLois Curfman McInnes } 9918965ea79SLois Curfman McInnes 9928965ea79SLois Curfman McInnes if (!rank) { 9930452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 9948965ea79SLois Curfman McInnes 9958965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 9968965ea79SLois Curfman McInnes nz = procsnz[0]; 9978965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 9988965ea79SLois Curfman McInnes 9998965ea79SLois Curfman McInnes /* insert into matrix */ 10008965ea79SLois Curfman McInnes jj = rstart; 10018965ea79SLois Curfman McInnes smycols = mycols; 10028965ea79SLois Curfman McInnes svals = vals; 10038965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10048965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10058965ea79SLois Curfman McInnes smycols += ourlens[i]; 10068965ea79SLois Curfman McInnes svals += ourlens[i]; 10078965ea79SLois Curfman McInnes jj++; 10088965ea79SLois Curfman McInnes } 10098965ea79SLois Curfman McInnes 10108965ea79SLois Curfman McInnes /* read in other processors and ship out */ 10118965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10128965ea79SLois Curfman McInnes nz = procsnz[i]; 10138965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10148965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 10158965ea79SLois Curfman McInnes } 10160452661fSBarry Smith PetscFree(procsnz); 10178965ea79SLois Curfman McInnes } 10188965ea79SLois Curfman McInnes else { 10198965ea79SLois Curfman McInnes /* receive numeric values */ 10200452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 10218965ea79SLois Curfman McInnes 10228965ea79SLois Curfman McInnes /* receive message of values*/ 10238965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 10248965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 102539ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10268965ea79SLois Curfman McInnes 10278965ea79SLois Curfman McInnes /* insert into matrix */ 10288965ea79SLois Curfman McInnes jj = rstart; 10298965ea79SLois Curfman McInnes smycols = mycols; 10308965ea79SLois Curfman McInnes svals = vals; 10318965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10328965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10338965ea79SLois Curfman McInnes smycols += ourlens[i]; 10348965ea79SLois Curfman McInnes svals += ourlens[i]; 10358965ea79SLois Curfman McInnes jj++; 10368965ea79SLois Curfman McInnes } 10378965ea79SLois Curfman McInnes } 10380452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 10398965ea79SLois Curfman McInnes 10408965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10418965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10428965ea79SLois Curfman McInnes return 0; 10438965ea79SLois Curfman McInnes } 1044