18965ea79SLois Curfman McInnes #ifndef lint 2*b49de8d1SLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.12 1995/11/21 03:27:15 curfman Exp curfman $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes #include "inline/spops.h" 128965ea79SLois Curfman McInnes 1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 148965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 158965ea79SLois Curfman McInnes { 1639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1739ddd567SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 188965ea79SLois Curfman McInnes 1939ddd567SLois Curfman McInnes if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) { 2039ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 218965ea79SLois Curfman McInnes } 2239ddd567SLois Curfman McInnes mdn->insertmode = addv; 238965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2439ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2539ddd567SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 268965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 278965ea79SLois Curfman McInnes row = idxm[i] - rstart; 288965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 2939ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 30*b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 31*b49de8d1SLois Curfman McInnes ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); CHKERRQ(ierr); 328965ea79SLois Curfman McInnes } 338965ea79SLois Curfman McInnes } 348965ea79SLois Curfman McInnes else { 35*b49de8d1SLois Curfman McInnes ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 36*b49de8d1SLois Curfman McInnes } 37*b49de8d1SLois Curfman McInnes } 38*b49de8d1SLois Curfman McInnes return 0; 39*b49de8d1SLois Curfman McInnes } 40*b49de8d1SLois Curfman McInnes 41*b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 42*b49de8d1SLois Curfman McInnes { 43*b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 44*b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 45*b49de8d1SLois Curfman McInnes 46*b49de8d1SLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatGetValues_MPIDense:Not for unassembled matrix"); 47*b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 48*b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 49*b49de8d1SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 50*b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 51*b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 52*b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 53*b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 54*b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 55*b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 56*b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 57*b49de8d1SLois Curfman McInnes } 58*b49de8d1SLois Curfman McInnes } 59*b49de8d1SLois Curfman McInnes else { 60*b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 618965ea79SLois Curfman McInnes } 628965ea79SLois Curfman McInnes } 638965ea79SLois Curfman McInnes return 0; 648965ea79SLois Curfman McInnes } 658965ea79SLois Curfman McInnes 6639ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 678965ea79SLois Curfman McInnes { 6839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 698965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 7039ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 718965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 7239ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 738965ea79SLois Curfman McInnes InsertMode addv; 7439ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 758965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 768965ea79SLois Curfman McInnes 778965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 7839ddd567SLois Curfman McInnes MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 7939ddd567SLois Curfman McInnes MPI_BOR,comm); 8039ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 8139ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 828965ea79SLois Curfman McInnes } 8339ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 848965ea79SLois Curfman McInnes 858965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 860452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 87cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 880452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 8939ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 9039ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 918965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 928965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 938965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 948965ea79SLois Curfman McInnes } 958965ea79SLois Curfman McInnes } 968965ea79SLois Curfman McInnes } 978965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 988965ea79SLois Curfman McInnes 998965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1000452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 10139ddd567SLois Curfman McInnes MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 1028965ea79SLois Curfman McInnes nreceives = work[rank]; 10339ddd567SLois Curfman McInnes MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 1048965ea79SLois Curfman McInnes nmax = work[rank]; 1050452661fSBarry Smith PetscFree(work); 1068965ea79SLois Curfman McInnes 1078965ea79SLois Curfman McInnes /* post receives: 1088965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1098965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1108965ea79SLois Curfman McInnes to simplify the message passing. 1118965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1128965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1138965ea79SLois Curfman McInnes this is a lot of wasted space. 1148965ea79SLois Curfman McInnes 1158965ea79SLois Curfman McInnes This could be done better. 1168965ea79SLois Curfman McInnes */ 1170452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 1188965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 1190452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 1208965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 1218965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 12239ddd567SLois Curfman McInnes MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1238965ea79SLois Curfman McInnes comm,recv_waits+i); 1248965ea79SLois Curfman McInnes } 1258965ea79SLois Curfman McInnes 1268965ea79SLois Curfman McInnes /* do sends: 1278965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1288965ea79SLois Curfman McInnes the ith processor 1298965ea79SLois Curfman McInnes */ 1300452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 13139ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1320452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1338965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1340452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1358965ea79SLois Curfman McInnes starts[0] = 0; 1368965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13739ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 13839ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 13939ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 14039ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1418965ea79SLois Curfman McInnes } 1420452661fSBarry Smith PetscFree(owner); 1438965ea79SLois Curfman McInnes starts[0] = 0; 1448965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1458965ea79SLois Curfman McInnes count = 0; 1468965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1478965ea79SLois Curfman McInnes if (procs[i]) { 14839ddd567SLois Curfman McInnes MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1498965ea79SLois Curfman McInnes comm,send_waits+count++); 1508965ea79SLois Curfman McInnes } 1518965ea79SLois Curfman McInnes } 1520452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1538965ea79SLois Curfman McInnes 1548965ea79SLois Curfman McInnes /* Free cache space */ 15539ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1568965ea79SLois Curfman McInnes 15739ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 15839ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 15939ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 16039ddd567SLois Curfman McInnes mdn->rmax = nmax; 1618965ea79SLois Curfman McInnes 1628965ea79SLois Curfman McInnes return 0; 1638965ea79SLois Curfman McInnes } 16439ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1658965ea79SLois Curfman McInnes 16639ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1678965ea79SLois Curfman McInnes { 16839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1698965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 17039ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1718965ea79SLois Curfman McInnes Scalar *values,val; 17239ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1738965ea79SLois Curfman McInnes 1748965ea79SLois Curfman McInnes /* wait on receives */ 1758965ea79SLois Curfman McInnes while (count) { 17639ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 1778965ea79SLois Curfman McInnes /* unpack receives into our local space */ 17839ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 1798965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 1808965ea79SLois Curfman McInnes n = n/3; 1818965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 18239ddd567SLois Curfman McInnes row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 1838965ea79SLois Curfman McInnes col = (int) PETSCREAL(values[3*i+1]); 1848965ea79SLois Curfman McInnes val = values[3*i+2]; 18539ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 18639ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 1878965ea79SLois Curfman McInnes } 18839ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 1898965ea79SLois Curfman McInnes } 1908965ea79SLois Curfman McInnes count--; 1918965ea79SLois Curfman McInnes } 1920452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 1938965ea79SLois Curfman McInnes 1948965ea79SLois Curfman McInnes /* wait on sends */ 19539ddd567SLois Curfman McInnes if (mdn->nsends) { 1960452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 1978965ea79SLois Curfman McInnes CHKPTRQ(send_status); 19839ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 1990452661fSBarry Smith PetscFree(send_status); 2008965ea79SLois Curfman McInnes } 2010452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2028965ea79SLois Curfman McInnes 20339ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 20439ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 20539ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2068965ea79SLois Curfman McInnes 20739ddd567SLois Curfman McInnes if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 20839ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2098965ea79SLois Curfman McInnes } 21039ddd567SLois Curfman McInnes mdn->assembled = 1; 2118965ea79SLois Curfman McInnes return 0; 2128965ea79SLois Curfman McInnes } 2138965ea79SLois Curfman McInnes 21439ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2158965ea79SLois Curfman McInnes { 21639ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 21739ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2188965ea79SLois Curfman McInnes } 2198965ea79SLois Curfman McInnes 2208965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2218965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2228965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2233501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2248965ea79SLois Curfman McInnes routine. 2258965ea79SLois Curfman McInnes */ 22639ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2278965ea79SLois Curfman McInnes { 22839ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2298965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2308965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2318965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2328965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2338965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2348965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2358965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2368965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2378965ea79SLois Curfman McInnes IS istmp; 2388965ea79SLois Curfman McInnes 23939ddd567SLois Curfman McInnes if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 2408965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 2418965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2428965ea79SLois Curfman McInnes 2438965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2440452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 245cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2460452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2478965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2488965ea79SLois Curfman McInnes idx = rows[i]; 2498965ea79SLois Curfman McInnes found = 0; 2508965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2518965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2528965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2538965ea79SLois Curfman McInnes } 2548965ea79SLois Curfman McInnes } 25539ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2568965ea79SLois Curfman McInnes } 2578965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2588965ea79SLois Curfman McInnes 2598965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2600452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2618965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2628965ea79SLois Curfman McInnes nrecvs = work[rank]; 2638965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2648965ea79SLois Curfman McInnes nmax = work[rank]; 2650452661fSBarry Smith PetscFree(work); 2668965ea79SLois Curfman McInnes 2678965ea79SLois Curfman McInnes /* post receives: */ 2680452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2698965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2700452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2718965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2728965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2738965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2748965ea79SLois Curfman McInnes } 2758965ea79SLois Curfman McInnes 2768965ea79SLois Curfman McInnes /* do sends: 2778965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2788965ea79SLois Curfman McInnes the ith processor 2798965ea79SLois Curfman McInnes */ 2800452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 2810452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 2828965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 2830452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 2848965ea79SLois Curfman McInnes starts[0] = 0; 2858965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2868965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2878965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 2888965ea79SLois Curfman McInnes } 2898965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 2908965ea79SLois Curfman McInnes 2918965ea79SLois Curfman McInnes starts[0] = 0; 2928965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2938965ea79SLois Curfman McInnes count = 0; 2948965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 2958965ea79SLois Curfman McInnes if (procs[i]) { 2968965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 2978965ea79SLois Curfman McInnes } 2988965ea79SLois Curfman McInnes } 2990452661fSBarry Smith PetscFree(starts); 3008965ea79SLois Curfman McInnes 3018965ea79SLois Curfman McInnes base = owners[rank]; 3028965ea79SLois Curfman McInnes 3038965ea79SLois Curfman McInnes /* wait on receives */ 3040452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3058965ea79SLois Curfman McInnes source = lens + nrecvs; 3068965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3078965ea79SLois Curfman McInnes while (count) { 3088965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3098965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3108965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3118965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3128965ea79SLois Curfman McInnes lens[imdex] = n; 3138965ea79SLois Curfman McInnes slen += n; 3148965ea79SLois Curfman McInnes count--; 3158965ea79SLois Curfman McInnes } 3160452661fSBarry Smith PetscFree(recv_waits); 3178965ea79SLois Curfman McInnes 3188965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3190452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3208965ea79SLois Curfman McInnes count = 0; 3218965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3228965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3238965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3248965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3258965ea79SLois Curfman McInnes } 3268965ea79SLois Curfman McInnes } 3270452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3280452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3298965ea79SLois Curfman McInnes 3308965ea79SLois Curfman McInnes /* actually zap the local rows */ 3318965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3328965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3330452661fSBarry Smith PetscFree(lrows); 3348965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3358965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3368965ea79SLois Curfman McInnes 3378965ea79SLois Curfman McInnes /* wait on sends */ 3388965ea79SLois Curfman McInnes if (nsends) { 3390452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3408965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3418965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3420452661fSBarry Smith PetscFree(send_status); 3438965ea79SLois Curfman McInnes } 3440452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3458965ea79SLois Curfman McInnes 3468965ea79SLois Curfman McInnes return 0; 3478965ea79SLois Curfman McInnes } 3488965ea79SLois Curfman McInnes 34939ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3508965ea79SLois Curfman McInnes { 35139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3528965ea79SLois Curfman McInnes int ierr; 35339ddd567SLois Curfman McInnes if (!mdn->assembled) 35439ddd567SLois Curfman McInnes SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 35539ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 35639ddd567SLois Curfman McInnes CHKERRQ(ierr); 35739ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 35839ddd567SLois Curfman McInnes CHKERRQ(ierr); 35939ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3608965ea79SLois Curfman McInnes return 0; 3618965ea79SLois Curfman McInnes } 3628965ea79SLois Curfman McInnes 36339ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3648965ea79SLois Curfman McInnes { 36539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3668965ea79SLois Curfman McInnes int ierr; 36739ddd567SLois Curfman McInnes if (!mdn->assembled) 36839ddd567SLois Curfman McInnes SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 3693501a2bdSLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 37039ddd567SLois Curfman McInnes CHKERRQ(ierr); 3713501a2bdSLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 37239ddd567SLois Curfman McInnes CHKERRQ(ierr); 37339ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3748965ea79SLois Curfman McInnes return 0; 3758965ea79SLois Curfman McInnes } 3768965ea79SLois Curfman McInnes 377096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 378096963f5SLois Curfman McInnes { 379096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 380096963f5SLois Curfman McInnes int ierr; 3813501a2bdSLois Curfman McInnes Scalar zero = 0.0; 382096963f5SLois Curfman McInnes 383096963f5SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix"); 3843501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 385096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 386096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 387096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 388096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 389096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 390096963f5SLois Curfman McInnes return 0; 391096963f5SLois Curfman McInnes } 392096963f5SLois Curfman McInnes 393096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 394096963f5SLois Curfman McInnes { 395096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 396096963f5SLois Curfman McInnes int ierr; 397096963f5SLois Curfman McInnes 398096963f5SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix"); 3993501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 4003501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 401096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 402096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 403096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 404096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 405096963f5SLois Curfman McInnes return 0; 406096963f5SLois Curfman McInnes } 407096963f5SLois Curfman McInnes 40839ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4098965ea79SLois Curfman McInnes { 41039ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 411096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 412096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 413096963f5SLois Curfman McInnes Scalar *x; 414ed3cc1f0SBarry Smith 41539ddd567SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 416096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 417096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 418096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 419096963f5SLois Curfman McInnes radd = a->rstart*m*m; 420096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 421096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 422096963f5SLois Curfman McInnes } 423096963f5SLois Curfman McInnes return 0; 4248965ea79SLois Curfman McInnes } 4258965ea79SLois Curfman McInnes 42639ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4278965ea79SLois Curfman McInnes { 4288965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4293501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4308965ea79SLois Curfman McInnes int ierr; 431ed3cc1f0SBarry Smith 4328965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4333501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4348965ea79SLois Curfman McInnes #endif 4350452661fSBarry Smith PetscFree(mdn->rowners); 4363501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4373501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4383501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 4390452661fSBarry Smith PetscFree(mdn); 4408965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4410452661fSBarry Smith PetscHeaderDestroy(mat); 4428965ea79SLois Curfman McInnes return 0; 4438965ea79SLois Curfman McInnes } 44439ddd567SLois Curfman McInnes 4458965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4468965ea79SLois Curfman McInnes 44739ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4488965ea79SLois Curfman McInnes { 44939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4508965ea79SLois Curfman McInnes int ierr; 45139ddd567SLois Curfman McInnes if (mdn->size == 1) { 45239ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4538965ea79SLois Curfman McInnes } 45439ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4558965ea79SLois Curfman McInnes return 0; 4568965ea79SLois Curfman McInnes } 4578965ea79SLois Curfman McInnes 45839ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4598965ea79SLois Curfman McInnes { 46039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 46139ddd567SLois Curfman McInnes int ierr, format; 4628965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 4638965ea79SLois Curfman McInnes FILE *fd; 4648965ea79SLois Curfman McInnes 4653501a2bdSLois Curfman McInnes ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 4668965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 4678965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 4683501a2bdSLois Curfman McInnes if (format == FILE_FORMAT_INFO_DETAILED) { 469096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 470096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 471096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 472096963f5SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 4733501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 474096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 475096963f5SLois Curfman McInnes fflush(fd); 476096963f5SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4773501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 4783501a2bdSLois Curfman McInnes return 0; 4793501a2bdSLois Curfman McInnes } 4803501a2bdSLois Curfman McInnes else if (format == FILE_FORMAT_INFO) { 481096963f5SLois Curfman McInnes return 0; 4828965ea79SLois Curfman McInnes } 4838965ea79SLois Curfman McInnes } 4848965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 4858965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 48639ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 48739ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 48839ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4898965ea79SLois Curfman McInnes fflush(fd); 4908965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4918965ea79SLois Curfman McInnes } 4928965ea79SLois Curfman McInnes else { 49339ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 4948965ea79SLois Curfman McInnes if (size == 1) { 49539ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4968965ea79SLois Curfman McInnes } 4978965ea79SLois Curfman McInnes else { 4988965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 4998965ea79SLois Curfman McInnes Mat A; 50039ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 50139ddd567SLois Curfman McInnes Scalar *vals; 50239ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5038965ea79SLois Curfman McInnes 5048965ea79SLois Curfman McInnes if (!rank) { 50518f449edSLois Curfman McInnes ierr = MatCreateMPIDense(mat->comm,M,M,N,N,0,&A); CHKERRQ(ierr); 5068965ea79SLois Curfman McInnes } 5078965ea79SLois Curfman McInnes else { 50818f449edSLois Curfman McInnes ierr = MatCreateMPIDense(mat->comm,0,M,N,N,0,&A); CHKERRQ(ierr); 5098965ea79SLois Curfman McInnes } 5108965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5118965ea79SLois Curfman McInnes 51239ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 51339ddd567SLois Curfman McInnes but it's quick for now */ 51439ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5158965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 51639ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 51739ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 51839ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 51939ddd567SLois Curfman McInnes row++; 5208965ea79SLois Curfman McInnes } 5218965ea79SLois Curfman McInnes 5228965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5238965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5248965ea79SLois Curfman McInnes if (!rank) { 52539ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5268965ea79SLois Curfman McInnes } 5278965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5288965ea79SLois Curfman McInnes } 5298965ea79SLois Curfman McInnes } 5308965ea79SLois Curfman McInnes return 0; 5318965ea79SLois Curfman McInnes } 5328965ea79SLois Curfman McInnes 53339ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5348965ea79SLois Curfman McInnes { 5358965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 53639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 5378965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 53839ddd567SLois Curfman McInnes int ierr; 5398965ea79SLois Curfman McInnes 54039ddd567SLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 5418965ea79SLois Curfman McInnes if (!viewer) { 5428965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 5438965ea79SLois Curfman McInnes } 54439ddd567SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 54539ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5468965ea79SLois Curfman McInnes } 5478965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 54839ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5498965ea79SLois Curfman McInnes } 5508965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 55139ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5528965ea79SLois Curfman McInnes } 5538965ea79SLois Curfman McInnes return 0; 5548965ea79SLois Curfman McInnes } 5558965ea79SLois Curfman McInnes 5563501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5578965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5588965ea79SLois Curfman McInnes { 5593501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5603501a2bdSLois Curfman McInnes Mat mdn = mat->A; 56139ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5628965ea79SLois Curfman McInnes 5633501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5648965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 5658965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 5668965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5673501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 5688965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5698965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5703501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 5718965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5728965ea79SLois Curfman McInnes } 5738965ea79SLois Curfman McInnes return 0; 5748965ea79SLois Curfman McInnes } 5758965ea79SLois Curfman McInnes 57639ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 5778965ea79SLois Curfman McInnes { 57839ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 5798965ea79SLois Curfman McInnes 5808965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 5818965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 5828965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 5838965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 5848965ea79SLois Curfman McInnes MatSetOption(a->A,op); 5858965ea79SLois Curfman McInnes } 5868965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 5878965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 5888965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 5898965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 59039ddd567SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 5918965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 59239ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");} 5938965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 59439ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 5958965ea79SLois Curfman McInnes else 59639ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 5978965ea79SLois Curfman McInnes return 0; 5988965ea79SLois Curfman McInnes } 5998965ea79SLois Curfman McInnes 6003501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6018965ea79SLois Curfman McInnes { 6023501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6038965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6048965ea79SLois Curfman McInnes return 0; 6058965ea79SLois Curfman McInnes } 6068965ea79SLois Curfman McInnes 6073501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6088965ea79SLois Curfman McInnes { 6093501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6108965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6118965ea79SLois Curfman McInnes return 0; 6128965ea79SLois Curfman McInnes } 6138965ea79SLois Curfman McInnes 6143501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6158965ea79SLois Curfman McInnes { 6163501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6178965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 6188965ea79SLois Curfman McInnes return 0; 6198965ea79SLois Curfman McInnes } 6208965ea79SLois Curfman McInnes 6213501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6228965ea79SLois Curfman McInnes { 6233501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 62439ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6258965ea79SLois Curfman McInnes 62639ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6278965ea79SLois Curfman McInnes lrow = row - rstart; 62839ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6298965ea79SLois Curfman McInnes } 6308965ea79SLois Curfman McInnes 63139ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6328965ea79SLois Curfman McInnes { 6330452661fSBarry Smith if (idx) PetscFree(*idx); 6340452661fSBarry Smith if (v) PetscFree(*v); 6358965ea79SLois Curfman McInnes return 0; 6368965ea79SLois Curfman McInnes } 6378965ea79SLois Curfman McInnes 638cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 639096963f5SLois Curfman McInnes { 6403501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6413501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6423501a2bdSLois Curfman McInnes int ierr, i, j; 6433501a2bdSLois Curfman McInnes double sum = 0.0; 6443501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6453501a2bdSLois Curfman McInnes 6463501a2bdSLois Curfman McInnes if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix"); 6473501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6483501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6493501a2bdSLois Curfman McInnes } else { 6503501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6513501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6523501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6533501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6543501a2bdSLois Curfman McInnes #else 6553501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6563501a2bdSLois Curfman McInnes #endif 6573501a2bdSLois Curfman McInnes } 6583501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6593501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6603501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6613501a2bdSLois Curfman McInnes } 6623501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6633501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6640452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6653501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 666cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 667096963f5SLois Curfman McInnes *norm = 0.0; 6683501a2bdSLois Curfman McInnes v = mat->v; 6693501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 6703501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 67167e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 6723501a2bdSLois Curfman McInnes } 6733501a2bdSLois Curfman McInnes } 6743501a2bdSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 6753501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 6763501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 6773501a2bdSLois Curfman McInnes } 6780452661fSBarry Smith PetscFree(tmp); 6793501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 6803501a2bdSLois Curfman McInnes } 6813501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 6823501a2bdSLois Curfman McInnes double ntemp; 6833501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 6843501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 6853501a2bdSLois Curfman McInnes } 6863501a2bdSLois Curfman McInnes else { 6873501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 6883501a2bdSLois Curfman McInnes } 6893501a2bdSLois Curfman McInnes } 6903501a2bdSLois Curfman McInnes return 0; 6913501a2bdSLois Curfman McInnes } 6923501a2bdSLois Curfman McInnes 6933501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 6943501a2bdSLois Curfman McInnes { 6953501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6963501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 6973501a2bdSLois Curfman McInnes Mat B; 6983501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 6993501a2bdSLois Curfman McInnes int j, i, ierr; 7003501a2bdSLois Curfman McInnes Scalar *v; 7013501a2bdSLois Curfman McInnes 7023501a2bdSLois Curfman McInnes if (!matout && M != N) 7033501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 70418f449edSLois Curfman McInnes ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,&B); CHKERRQ(ierr); 7053501a2bdSLois Curfman McInnes 7063501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 7070452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 7083501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 7093501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 7103501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 7113501a2bdSLois Curfman McInnes v += m; 7123501a2bdSLois Curfman McInnes } 7130452661fSBarry Smith PetscFree(rwork); 7143501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7153501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7163501a2bdSLois Curfman McInnes if (matout) { 7173501a2bdSLois Curfman McInnes *matout = B; 7183501a2bdSLois Curfman McInnes } else { 7193501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 7200452661fSBarry Smith PetscFree(a->rowners); 7213501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7223501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7233501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7240452661fSBarry Smith PetscFree(a); 7253501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7260452661fSBarry Smith PetscHeaderDestroy(B); 7273501a2bdSLois Curfman McInnes } 728096963f5SLois Curfman McInnes return 0; 729096963f5SLois Curfman McInnes } 730096963f5SLois Curfman McInnes 73155659b69SBarry Smith static int MatCopyPrivate_MPIDense(Mat,Mat *,int); 7328965ea79SLois Curfman McInnes 7338965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 73439ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 73539ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 73639ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 737096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 73839ddd567SLois Curfman McInnes 0,0, 73939ddd567SLois Curfman McInnes 0,0,0, 7403501a2bdSLois Curfman McInnes 0,0,MatTranspose_MPIDense, 74139ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 742096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 74339ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7448965ea79SLois Curfman McInnes 0, 74539ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 74639ddd567SLois Curfman McInnes 0, 74739ddd567SLois Curfman McInnes 0,0,0,0, 74839ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 74939ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 75039ddd567SLois Curfman McInnes 0,0, 751*b49de8d1SLois Curfman McInnes 0,0,0,0,0,MatCopyPrivate_MPIDense, 752*b49de8d1SLois Curfman McInnes 0,0,0,0,0, 753*b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIDense}; 7548965ea79SLois Curfman McInnes 7558965ea79SLois Curfman McInnes /*@C 75639ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7578965ea79SLois Curfman McInnes 7588965ea79SLois Curfman McInnes Input Parameters: 7598965ea79SLois Curfman McInnes . comm - MPI communicator 7608965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 7618965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 7628965ea79SLois Curfman McInnes if N is given) 7638965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 7648965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 7658965ea79SLois Curfman McInnes if n is given) 76618f449edSLois Curfman McInnes . data - optional location of matrix data. Set data=0 for PETSc to 76718f449edSLois Curfman McInnes control all matrix memory allocation. 7688965ea79SLois Curfman McInnes 7698965ea79SLois Curfman McInnes Output Parameter: 7708965ea79SLois Curfman McInnes . newmat - the matrix 7718965ea79SLois Curfman McInnes 7728965ea79SLois Curfman McInnes Notes: 77339ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 77439ddd567SLois Curfman McInnes storage by columns. 7758965ea79SLois Curfman McInnes 77618f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 77718f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 77818f449edSLois Curfman McInnes set data=0. 77918f449edSLois Curfman McInnes 7808965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 7818965ea79SLois Curfman McInnes (possibly both). 7828965ea79SLois Curfman McInnes 7833501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 7843501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 7853501a2bdSLois Curfman McInnes 78639ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 7878965ea79SLois Curfman McInnes 78839ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 7898965ea79SLois Curfman McInnes @*/ 79018f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 7918965ea79SLois Curfman McInnes { 7928965ea79SLois Curfman McInnes Mat mat; 79339ddd567SLois Curfman McInnes Mat_MPIDense *a; 79439ddd567SLois Curfman McInnes int ierr, i; 7958965ea79SLois Curfman McInnes 79618f449edSLois Curfman McInnes /* Note: for now, this assumes that the user knows what he's doing if 79718f449edSLois Curfman McInnes data is specified above. */ 79818f449edSLois Curfman McInnes 7998965ea79SLois Curfman McInnes *newmat = 0; 8000452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 8018965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8020452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8038965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 80439ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 80539ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8068965ea79SLois Curfman McInnes mat->factor = 0; 8078965ea79SLois Curfman McInnes 8088965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8098965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 8108965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 8118965ea79SLois Curfman McInnes 81239ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 8138965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 81439ddd567SLois Curfman McInnes 81539ddd567SLois Curfman McInnes /* each row stores all columns */ 81639ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 817d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 818d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 8198965ea79SLois Curfman McInnes a->N = N; 8208965ea79SLois Curfman McInnes a->M = M; 82139ddd567SLois Curfman McInnes a->m = m; 82239ddd567SLois Curfman McInnes a->n = n; 8238965ea79SLois Curfman McInnes 8248965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 825d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 826d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 827d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 82839ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8298965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8308965ea79SLois Curfman McInnes a->rowners[0] = 0; 8318965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8328965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8338965ea79SLois Curfman McInnes } 8348965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8358965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 836d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 837d7e8b826SBarry Smith a->cowners[0] = 0; 838d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 839d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 840d7e8b826SBarry Smith } 8418965ea79SLois Curfman McInnes 84218f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8438965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8448965ea79SLois Curfman McInnes 8458965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8468965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8478965ea79SLois Curfman McInnes 8488965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8498965ea79SLois Curfman McInnes a->lvec = 0; 8508965ea79SLois Curfman McInnes a->Mvctx = 0; 8518965ea79SLois Curfman McInnes a->assembled = 0; 8528965ea79SLois Curfman McInnes 8538965ea79SLois Curfman McInnes *newmat = mat; 8548965ea79SLois Curfman McInnes return 0; 8558965ea79SLois Curfman McInnes } 8568965ea79SLois Curfman McInnes 8573501a2bdSLois Curfman McInnes static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues) 8588965ea79SLois Curfman McInnes { 8598965ea79SLois Curfman McInnes Mat mat; 8603501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 86139ddd567SLois Curfman McInnes int ierr; 8628965ea79SLois Curfman McInnes 86339ddd567SLois Curfman McInnes if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix"); 8648965ea79SLois Curfman McInnes *newmat = 0; 8650452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 8668965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8670452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8688965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 86939ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 87039ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8713501a2bdSLois Curfman McInnes mat->factor = A->factor; 8728965ea79SLois Curfman McInnes 8738965ea79SLois Curfman McInnes a->m = oldmat->m; 8748965ea79SLois Curfman McInnes a->n = oldmat->n; 8758965ea79SLois Curfman McInnes a->M = oldmat->M; 8768965ea79SLois Curfman McInnes a->N = oldmat->N; 8778965ea79SLois Curfman McInnes 8788965ea79SLois Curfman McInnes a->assembled = 1; 8798965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 8808965ea79SLois Curfman McInnes a->rend = oldmat->rend; 8818965ea79SLois Curfman McInnes a->size = oldmat->size; 8828965ea79SLois Curfman McInnes a->rank = oldmat->rank; 8838965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8848965ea79SLois Curfman McInnes 8850452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 88639ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 8878965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 8888965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 8898965ea79SLois Curfman McInnes 8908965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 8918965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 89255659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 8938965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 8948965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 8958965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8968965ea79SLois Curfman McInnes *newmat = mat; 8978965ea79SLois Curfman McInnes return 0; 8988965ea79SLois Curfman McInnes } 8998965ea79SLois Curfman McInnes 9008965ea79SLois Curfman McInnes #include "sysio.h" 9018965ea79SLois Curfman McInnes 90239ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 9038965ea79SLois Curfman McInnes { 9048965ea79SLois Curfman McInnes Mat A; 9058965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 9068965ea79SLois Curfman McInnes Scalar *vals,*svals; 9078965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 9088965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 9098965ea79SLois Curfman McInnes MPI_Status status; 9108965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 9118965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 9128965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 9138965ea79SLois Curfman McInnes 9148965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 9158965ea79SLois Curfman McInnes if (!rank) { 9168965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 9178965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 91839ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 9198965ea79SLois Curfman McInnes } 9208965ea79SLois Curfman McInnes 9218965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 9228965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 9238965ea79SLois Curfman McInnes /* determine ownership of all rows */ 9248965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 9250452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 9268965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 9278965ea79SLois Curfman McInnes rowners[0] = 0; 9288965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 9298965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 9308965ea79SLois Curfman McInnes } 9318965ea79SLois Curfman McInnes rstart = rowners[rank]; 9328965ea79SLois Curfman McInnes rend = rowners[rank+1]; 9338965ea79SLois Curfman McInnes 9348965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 9350452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 9368965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 9378965ea79SLois Curfman McInnes if (!rank) { 9380452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 9398965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 9400452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 9418965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 9428965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 9430452661fSBarry Smith PetscFree(sndcounts); 9448965ea79SLois Curfman McInnes } 9458965ea79SLois Curfman McInnes else { 9468965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 9478965ea79SLois Curfman McInnes } 9488965ea79SLois Curfman McInnes 9498965ea79SLois Curfman McInnes if (!rank) { 9508965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 9510452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 952cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 9538965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9548965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 9558965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 9568965ea79SLois Curfman McInnes } 9578965ea79SLois Curfman McInnes } 9580452661fSBarry Smith PetscFree(rowlengths); 9598965ea79SLois Curfman McInnes 9608965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 9618965ea79SLois Curfman McInnes maxnz = 0; 9628965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9630452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 9648965ea79SLois Curfman McInnes } 9650452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 9668965ea79SLois Curfman McInnes 9678965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 9688965ea79SLois Curfman McInnes nz = procsnz[0]; 9690452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9708965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 9718965ea79SLois Curfman McInnes 9728965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 9738965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 9748965ea79SLois Curfman McInnes nz = procsnz[i]; 9758965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 9768965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 9778965ea79SLois Curfman McInnes } 9780452661fSBarry Smith PetscFree(cols); 9798965ea79SLois Curfman McInnes } 9808965ea79SLois Curfman McInnes else { 9818965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 9828965ea79SLois Curfman McInnes nz = 0; 9838965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 9848965ea79SLois Curfman McInnes nz += ourlens[i]; 9858965ea79SLois Curfman McInnes } 9860452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9878965ea79SLois Curfman McInnes 9888965ea79SLois Curfman McInnes /* receive message of column indices*/ 9898965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 9908965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 99139ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 9928965ea79SLois Curfman McInnes } 9938965ea79SLois Curfman McInnes 9948965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 995cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 9968965ea79SLois Curfman McInnes jj = 0; 9978965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 9988965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 9998965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 10008965ea79SLois Curfman McInnes jj++; 10018965ea79SLois Curfman McInnes } 10028965ea79SLois Curfman McInnes } 10038965ea79SLois Curfman McInnes 10048965ea79SLois Curfman McInnes /* create our matrix */ 10058965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10068965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 10078965ea79SLois Curfman McInnes } 100839ddd567SLois Curfman McInnes if (type == MATMPIDENSE) { 100918f449edSLois Curfman McInnes ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,0,newmat); CHKERRQ(ierr); 10108965ea79SLois Curfman McInnes } 10118965ea79SLois Curfman McInnes A = *newmat; 10128965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10138965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 10148965ea79SLois Curfman McInnes } 10158965ea79SLois Curfman McInnes 10168965ea79SLois Curfman McInnes if (!rank) { 10170452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 10188965ea79SLois Curfman McInnes 10198965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 10208965ea79SLois Curfman McInnes nz = procsnz[0]; 10218965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10228965ea79SLois Curfman McInnes 10238965ea79SLois Curfman McInnes /* insert into matrix */ 10248965ea79SLois Curfman McInnes jj = rstart; 10258965ea79SLois Curfman McInnes smycols = mycols; 10268965ea79SLois Curfman McInnes svals = vals; 10278965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10288965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10298965ea79SLois Curfman McInnes smycols += ourlens[i]; 10308965ea79SLois Curfman McInnes svals += ourlens[i]; 10318965ea79SLois Curfman McInnes jj++; 10328965ea79SLois Curfman McInnes } 10338965ea79SLois Curfman McInnes 10348965ea79SLois Curfman McInnes /* read in other processors and ship out */ 10358965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10368965ea79SLois Curfman McInnes nz = procsnz[i]; 10378965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10388965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 10398965ea79SLois Curfman McInnes } 10400452661fSBarry Smith PetscFree(procsnz); 10418965ea79SLois Curfman McInnes } 10428965ea79SLois Curfman McInnes else { 10438965ea79SLois Curfman McInnes /* receive numeric values */ 10440452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 10458965ea79SLois Curfman McInnes 10468965ea79SLois Curfman McInnes /* receive message of values*/ 10478965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 10488965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 104939ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10508965ea79SLois Curfman McInnes 10518965ea79SLois Curfman McInnes /* insert into matrix */ 10528965ea79SLois Curfman McInnes jj = rstart; 10538965ea79SLois Curfman McInnes smycols = mycols; 10548965ea79SLois Curfman McInnes svals = vals; 10558965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10568965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10578965ea79SLois Curfman McInnes smycols += ourlens[i]; 10588965ea79SLois Curfman McInnes svals += ourlens[i]; 10598965ea79SLois Curfman McInnes jj++; 10608965ea79SLois Curfman McInnes } 10618965ea79SLois Curfman McInnes } 10620452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 10638965ea79SLois Curfman McInnes 10648965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10658965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10668965ea79SLois Curfman McInnes return 0; 10678965ea79SLois Curfman McInnes } 1068