18965ea79SLois Curfman McInnes #ifndef lint 2*c456f294SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.25 1996/01/12 22:07:01 bsmith Exp bsmith $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes #include "inline/spops.h" 128965ea79SLois Curfman McInnes 1339ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 148965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 158965ea79SLois Curfman McInnes { 1639b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1739b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1839b7565bSBarry Smith int roworiented = A->roworiented; 198965ea79SLois Curfman McInnes 2039b7565bSBarry Smith if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 2139ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 228965ea79SLois Curfman McInnes } 2339b7565bSBarry Smith A->insertmode = addv; 248965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2539ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2639b7565bSBarry Smith if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 278965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 288965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2939b7565bSBarry Smith if (roworiented) { 3039b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 3139b7565bSBarry Smith } 3239b7565bSBarry Smith else { 338965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 3439ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 3539b7565bSBarry Smith if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 3639b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3739b7565bSBarry Smith } 388965ea79SLois Curfman McInnes } 398965ea79SLois Curfman McInnes } 408965ea79SLois Curfman McInnes else { 4139b7565bSBarry Smith if (roworiented) { 4239b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 4339b7565bSBarry Smith } 4439b7565bSBarry Smith else { /* must stash each seperately */ 4539b7565bSBarry Smith row = idxm[i]; 4639b7565bSBarry Smith for ( j=0; j<n; j++ ) { 4739b7565bSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 4839b7565bSBarry Smith CHKERRQ(ierr); 4939b7565bSBarry Smith } 5039b7565bSBarry Smith } 51b49de8d1SLois Curfman McInnes } 52b49de8d1SLois Curfman McInnes } 53b49de8d1SLois Curfman McInnes return 0; 54b49de8d1SLois Curfman McInnes } 55b49de8d1SLois Curfman McInnes 56b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 57b49de8d1SLois Curfman McInnes { 58b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 59b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 60b49de8d1SLois Curfman McInnes 61b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 62b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 63b49de8d1SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 64b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 65b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 66b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 67b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 68b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 69b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 70b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 71b49de8d1SLois Curfman McInnes } 72b49de8d1SLois Curfman McInnes } 73b49de8d1SLois Curfman McInnes else { 74b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 758965ea79SLois Curfman McInnes } 768965ea79SLois Curfman McInnes } 778965ea79SLois Curfman McInnes return 0; 788965ea79SLois Curfman McInnes } 798965ea79SLois Curfman McInnes 8039ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 818965ea79SLois Curfman McInnes { 8239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 838965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 8439ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 858965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 8639ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 878965ea79SLois Curfman McInnes InsertMode addv; 8839ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 898965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 908965ea79SLois Curfman McInnes 918965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 9239ddd567SLois Curfman McInnes MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 9339ddd567SLois Curfman McInnes MPI_BOR,comm); 9439ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 9539ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 968965ea79SLois Curfman McInnes } 9739ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 988965ea79SLois Curfman McInnes 998965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 1000452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 101cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1020452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 10339ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 10439ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1058965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1068965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1078965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1088965ea79SLois Curfman McInnes } 1098965ea79SLois Curfman McInnes } 1108965ea79SLois Curfman McInnes } 1118965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1128965ea79SLois Curfman McInnes 1138965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1140452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 11539ddd567SLois Curfman McInnes MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 1168965ea79SLois Curfman McInnes nreceives = work[rank]; 11739ddd567SLois Curfman McInnes MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 1188965ea79SLois Curfman McInnes nmax = work[rank]; 1190452661fSBarry Smith PetscFree(work); 1208965ea79SLois Curfman McInnes 1218965ea79SLois Curfman McInnes /* post receives: 1228965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1238965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1248965ea79SLois Curfman McInnes to simplify the message passing. 1258965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1268965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1278965ea79SLois Curfman McInnes this is a lot of wasted space. 1288965ea79SLois Curfman McInnes 1298965ea79SLois Curfman McInnes This could be done better. 1308965ea79SLois Curfman McInnes */ 1310452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 1328965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 1330452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 1348965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 1358965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 13639ddd567SLois Curfman McInnes MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1378965ea79SLois Curfman McInnes comm,recv_waits+i); 1388965ea79SLois Curfman McInnes } 1398965ea79SLois Curfman McInnes 1408965ea79SLois Curfman McInnes /* do sends: 1418965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1428965ea79SLois Curfman McInnes the ith processor 1438965ea79SLois Curfman McInnes */ 1440452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 14539ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1460452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1478965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1480452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1498965ea79SLois Curfman McInnes starts[0] = 0; 1508965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15139ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 15239ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 15339ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 15439ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1558965ea79SLois Curfman McInnes } 1560452661fSBarry Smith PetscFree(owner); 1578965ea79SLois Curfman McInnes starts[0] = 0; 1588965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1598965ea79SLois Curfman McInnes count = 0; 1608965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1618965ea79SLois Curfman McInnes if (procs[i]) { 16239ddd567SLois Curfman McInnes MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1638965ea79SLois Curfman McInnes comm,send_waits+count++); 1648965ea79SLois Curfman McInnes } 1658965ea79SLois Curfman McInnes } 1660452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1678965ea79SLois Curfman McInnes 1688965ea79SLois Curfman McInnes /* Free cache space */ 16939ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1708965ea79SLois Curfman McInnes 17139ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 17239ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 17339ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 17439ddd567SLois Curfman McInnes mdn->rmax = nmax; 1758965ea79SLois Curfman McInnes 1768965ea79SLois Curfman McInnes return 0; 1778965ea79SLois Curfman McInnes } 17839ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1798965ea79SLois Curfman McInnes 18039ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1818965ea79SLois Curfman McInnes { 18239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1838965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 18439ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1858965ea79SLois Curfman McInnes Scalar *values,val; 18639ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1878965ea79SLois Curfman McInnes 1888965ea79SLois Curfman McInnes /* wait on receives */ 1898965ea79SLois Curfman McInnes while (count) { 19039ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 1918965ea79SLois Curfman McInnes /* unpack receives into our local space */ 19239ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 1938965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 1948965ea79SLois Curfman McInnes n = n/3; 1958965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 19639ddd567SLois Curfman McInnes row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 1978965ea79SLois Curfman McInnes col = (int) PETSCREAL(values[3*i+1]); 1988965ea79SLois Curfman McInnes val = values[3*i+2]; 19939ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 20039ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 2018965ea79SLois Curfman McInnes } 20239ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 2038965ea79SLois Curfman McInnes } 2048965ea79SLois Curfman McInnes count--; 2058965ea79SLois Curfman McInnes } 2060452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2078965ea79SLois Curfman McInnes 2088965ea79SLois Curfman McInnes /* wait on sends */ 20939ddd567SLois Curfman McInnes if (mdn->nsends) { 2100452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 2118965ea79SLois Curfman McInnes CHKPTRQ(send_status); 21239ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2130452661fSBarry Smith PetscFree(send_status); 2148965ea79SLois Curfman McInnes } 2150452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2168965ea79SLois Curfman McInnes 21739ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 21839ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 21939ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2208965ea79SLois Curfman McInnes 221*c456f294SBarry Smith if (!mat->assembled && mode == FINAL_ASSEMBLY) { 22239ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2238965ea79SLois Curfman McInnes } 2248965ea79SLois Curfman McInnes return 0; 2258965ea79SLois Curfman McInnes } 2268965ea79SLois Curfman McInnes 22739ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2288965ea79SLois Curfman McInnes { 22939ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 23039ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2318965ea79SLois Curfman McInnes } 2328965ea79SLois Curfman McInnes 2338965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2348965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2358965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2363501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2378965ea79SLois Curfman McInnes routine. 2388965ea79SLois Curfman McInnes */ 23939ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2408965ea79SLois Curfman McInnes { 24139ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2428965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2438965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2448965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2458965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2468965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2478965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2488965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2498965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2508965ea79SLois Curfman McInnes IS istmp; 2518965ea79SLois Curfman McInnes 2528965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 2538965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2548965ea79SLois Curfman McInnes 2558965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2560452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 257cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2580452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2598965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2608965ea79SLois Curfman McInnes idx = rows[i]; 2618965ea79SLois Curfman McInnes found = 0; 2628965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2638965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2648965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2658965ea79SLois Curfman McInnes } 2668965ea79SLois Curfman McInnes } 26739ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2688965ea79SLois Curfman McInnes } 2698965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2708965ea79SLois Curfman McInnes 2718965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2720452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2738965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2748965ea79SLois Curfman McInnes nrecvs = work[rank]; 2758965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2768965ea79SLois Curfman McInnes nmax = work[rank]; 2770452661fSBarry Smith PetscFree(work); 2788965ea79SLois Curfman McInnes 2798965ea79SLois Curfman McInnes /* post receives: */ 2800452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2818965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2820452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2838965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2848965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2858965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2868965ea79SLois Curfman McInnes } 2878965ea79SLois Curfman McInnes 2888965ea79SLois Curfman McInnes /* do sends: 2898965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2908965ea79SLois Curfman McInnes the ith processor 2918965ea79SLois Curfman McInnes */ 2920452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 2930452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 2948965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 2950452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 2968965ea79SLois Curfman McInnes starts[0] = 0; 2978965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2988965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2998965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3008965ea79SLois Curfman McInnes } 3018965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3028965ea79SLois Curfman McInnes 3038965ea79SLois Curfman McInnes starts[0] = 0; 3048965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3058965ea79SLois Curfman McInnes count = 0; 3068965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3078965ea79SLois Curfman McInnes if (procs[i]) { 3088965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3098965ea79SLois Curfman McInnes } 3108965ea79SLois Curfman McInnes } 3110452661fSBarry Smith PetscFree(starts); 3128965ea79SLois Curfman McInnes 3138965ea79SLois Curfman McInnes base = owners[rank]; 3148965ea79SLois Curfman McInnes 3158965ea79SLois Curfman McInnes /* wait on receives */ 3160452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3178965ea79SLois Curfman McInnes source = lens + nrecvs; 3188965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3198965ea79SLois Curfman McInnes while (count) { 3208965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3218965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3228965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3238965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3248965ea79SLois Curfman McInnes lens[imdex] = n; 3258965ea79SLois Curfman McInnes slen += n; 3268965ea79SLois Curfman McInnes count--; 3278965ea79SLois Curfman McInnes } 3280452661fSBarry Smith PetscFree(recv_waits); 3298965ea79SLois Curfman McInnes 3308965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3310452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3328965ea79SLois Curfman McInnes count = 0; 3338965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3348965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3358965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3368965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3378965ea79SLois Curfman McInnes } 3388965ea79SLois Curfman McInnes } 3390452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3400452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3418965ea79SLois Curfman McInnes 3428965ea79SLois Curfman McInnes /* actually zap the local rows */ 3438965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3448965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3450452661fSBarry Smith PetscFree(lrows); 3468965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3478965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3488965ea79SLois Curfman McInnes 3498965ea79SLois Curfman McInnes /* wait on sends */ 3508965ea79SLois Curfman McInnes if (nsends) { 3510452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3528965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3538965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3540452661fSBarry Smith PetscFree(send_status); 3558965ea79SLois Curfman McInnes } 3560452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3578965ea79SLois Curfman McInnes 3588965ea79SLois Curfman McInnes return 0; 3598965ea79SLois Curfman McInnes } 3608965ea79SLois Curfman McInnes 36139ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3628965ea79SLois Curfman McInnes { 36339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3648965ea79SLois Curfman McInnes int ierr; 365*c456f294SBarry Smith 36639ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 36739ddd567SLois Curfman McInnes CHKERRQ(ierr); 36839ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 36939ddd567SLois Curfman McInnes CHKERRQ(ierr); 37039ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3718965ea79SLois Curfman McInnes return 0; 3728965ea79SLois Curfman McInnes } 3738965ea79SLois Curfman McInnes 37439ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3758965ea79SLois Curfman McInnes { 37639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3778965ea79SLois Curfman McInnes int ierr; 378*c456f294SBarry Smith 379*c456f294SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 380*c456f294SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 38139ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3828965ea79SLois Curfman McInnes return 0; 3838965ea79SLois Curfman McInnes } 3848965ea79SLois Curfman McInnes 385096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 386096963f5SLois Curfman McInnes { 387096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 388096963f5SLois Curfman McInnes int ierr; 3893501a2bdSLois Curfman McInnes Scalar zero = 0.0; 390096963f5SLois Curfman McInnes 3913501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 392096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 393096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 394096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 395096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 396096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 397096963f5SLois Curfman McInnes return 0; 398096963f5SLois Curfman McInnes } 399096963f5SLois Curfman McInnes 400096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 401096963f5SLois Curfman McInnes { 402096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 403096963f5SLois Curfman McInnes int ierr; 404096963f5SLois Curfman McInnes 4053501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 4063501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 407096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 408096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 409096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 410096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 411096963f5SLois Curfman McInnes return 0; 412096963f5SLois Curfman McInnes } 413096963f5SLois Curfman McInnes 41439ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4158965ea79SLois Curfman McInnes { 41639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 417096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 418096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 419096963f5SLois Curfman McInnes Scalar *x; 420ed3cc1f0SBarry Smith 421096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 422096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 423096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 424096963f5SLois Curfman McInnes radd = a->rstart*m*m; 425096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 426096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 427096963f5SLois Curfman McInnes } 428096963f5SLois Curfman McInnes return 0; 4298965ea79SLois Curfman McInnes } 4308965ea79SLois Curfman McInnes 43139ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4328965ea79SLois Curfman McInnes { 4338965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4343501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4358965ea79SLois Curfman McInnes int ierr; 436ed3cc1f0SBarry Smith 4378965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4383501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4398965ea79SLois Curfman McInnes #endif 4400452661fSBarry Smith PetscFree(mdn->rowners); 4413501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4423501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4433501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 444622d7880SLois Curfman McInnes if (mdn->factor) { 445622d7880SLois Curfman McInnes if (mdn->factor->temp) PetscFree(mdn->factor->temp); 446622d7880SLois Curfman McInnes if (mdn->factor->tag) PetscFree(mdn->factor->tag); 447622d7880SLois Curfman McInnes if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 448622d7880SLois Curfman McInnes PetscFree(mdn->factor); 449622d7880SLois Curfman McInnes } 4500452661fSBarry Smith PetscFree(mdn); 4518965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4520452661fSBarry Smith PetscHeaderDestroy(mat); 4538965ea79SLois Curfman McInnes return 0; 4548965ea79SLois Curfman McInnes } 45539ddd567SLois Curfman McInnes 4568965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4578965ea79SLois Curfman McInnes 45839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4598965ea79SLois Curfman McInnes { 46039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4618965ea79SLois Curfman McInnes int ierr; 46239ddd567SLois Curfman McInnes if (mdn->size == 1) { 46339ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4648965ea79SLois Curfman McInnes } 46539ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4668965ea79SLois Curfman McInnes return 0; 4678965ea79SLois Curfman McInnes } 4688965ea79SLois Curfman McInnes 46939ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4708965ea79SLois Curfman McInnes { 47139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 47239ddd567SLois Curfman McInnes int ierr, format; 4738965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 4748965ea79SLois Curfman McInnes FILE *fd; 4758965ea79SLois Curfman McInnes 4763501a2bdSLois Curfman McInnes ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 4778965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 4788965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 4793501a2bdSLois Curfman McInnes if (format == FILE_FORMAT_INFO_DETAILED) { 480096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 481096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 482096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 483096963f5SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 4843501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 485096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 486096963f5SLois Curfman McInnes fflush(fd); 487096963f5SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4883501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 4893501a2bdSLois Curfman McInnes return 0; 4903501a2bdSLois Curfman McInnes } 4913501a2bdSLois Curfman McInnes else if (format == FILE_FORMAT_INFO) { 492096963f5SLois Curfman McInnes return 0; 4938965ea79SLois Curfman McInnes } 4948965ea79SLois Curfman McInnes } 4958965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 4968965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 49739ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 49839ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 49939ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5008965ea79SLois Curfman McInnes fflush(fd); 5018965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 5028965ea79SLois Curfman McInnes } 5038965ea79SLois Curfman McInnes else { 50439ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5058965ea79SLois Curfman McInnes if (size == 1) { 50639ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5078965ea79SLois Curfman McInnes } 5088965ea79SLois Curfman McInnes else { 5098965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5108965ea79SLois Curfman McInnes Mat A; 51139ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 51239ddd567SLois Curfman McInnes Scalar *vals; 51339ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5148965ea79SLois Curfman McInnes 5158965ea79SLois Curfman McInnes if (!rank) { 516b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5178965ea79SLois Curfman McInnes } 5188965ea79SLois Curfman McInnes else { 519b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5208965ea79SLois Curfman McInnes } 5218965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5228965ea79SLois Curfman McInnes 52339ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 52439ddd567SLois Curfman McInnes but it's quick for now */ 52539ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5268965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 52739ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 52839ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 52939ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 53039ddd567SLois Curfman McInnes row++; 5318965ea79SLois Curfman McInnes } 5328965ea79SLois Curfman McInnes 5338965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5348965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5358965ea79SLois Curfman McInnes if (!rank) { 53639ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5378965ea79SLois Curfman McInnes } 5388965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5398965ea79SLois Curfman McInnes } 5408965ea79SLois Curfman McInnes } 5418965ea79SLois Curfman McInnes return 0; 5428965ea79SLois Curfman McInnes } 5438965ea79SLois Curfman McInnes 54439ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5458965ea79SLois Curfman McInnes { 5468965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 5478965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 54839ddd567SLois Curfman McInnes int ierr; 5498965ea79SLois Curfman McInnes 5508965ea79SLois Curfman McInnes if (!viewer) { 5518965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 5528965ea79SLois Curfman McInnes } 55339ddd567SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 55439ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5558965ea79SLois Curfman McInnes } 5568965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 55739ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5588965ea79SLois Curfman McInnes } 5598965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 56039ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5618965ea79SLois Curfman McInnes } 5628965ea79SLois Curfman McInnes return 0; 5638965ea79SLois Curfman McInnes } 5648965ea79SLois Curfman McInnes 5653501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5668965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5678965ea79SLois Curfman McInnes { 5683501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5693501a2bdSLois Curfman McInnes Mat mdn = mat->A; 57039ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5718965ea79SLois Curfman McInnes 5723501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5738965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 5748965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 5758965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5763501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 5778965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5788965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5793501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 5808965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5818965ea79SLois Curfman McInnes } 5828965ea79SLois Curfman McInnes return 0; 5838965ea79SLois Curfman McInnes } 5848965ea79SLois Curfman McInnes 5858c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 5868aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 5878aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 5888aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 5898c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 5908aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 5918aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 5928aaee692SLois Curfman McInnes 59339ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 5948965ea79SLois Curfman McInnes { 59539ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 5968965ea79SLois Curfman McInnes 5978965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 5988965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 5998965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 6008965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 6018965ea79SLois Curfman McInnes MatSetOption(a->A,op); 6028965ea79SLois Curfman McInnes } 6038965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 6048965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 6058965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 6068965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 60739ddd567SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6088965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 60939b7565bSBarry Smith { a->roworiented = 0; MatSetOption(a->A,op);} 6108965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 61139ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 6128965ea79SLois Curfman McInnes else 61339ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 6148965ea79SLois Curfman McInnes return 0; 6158965ea79SLois Curfman McInnes } 6168965ea79SLois Curfman McInnes 6173501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6188965ea79SLois Curfman McInnes { 6193501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6208965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6218965ea79SLois Curfman McInnes return 0; 6228965ea79SLois Curfman McInnes } 6238965ea79SLois Curfman McInnes 6243501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6258965ea79SLois Curfman McInnes { 6263501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6278965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6288965ea79SLois Curfman McInnes return 0; 6298965ea79SLois Curfman McInnes } 6308965ea79SLois Curfman McInnes 6313501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6328965ea79SLois Curfman McInnes { 6333501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6348965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 6358965ea79SLois Curfman McInnes return 0; 6368965ea79SLois Curfman McInnes } 6378965ea79SLois Curfman McInnes 6383501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6398965ea79SLois Curfman McInnes { 6403501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 64139ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6428965ea79SLois Curfman McInnes 64339ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6448965ea79SLois Curfman McInnes lrow = row - rstart; 64539ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6468965ea79SLois Curfman McInnes } 6478965ea79SLois Curfman McInnes 64839ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6498965ea79SLois Curfman McInnes { 6500452661fSBarry Smith if (idx) PetscFree(*idx); 6510452661fSBarry Smith if (v) PetscFree(*v); 6528965ea79SLois Curfman McInnes return 0; 6538965ea79SLois Curfman McInnes } 6548965ea79SLois Curfman McInnes 655cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 656096963f5SLois Curfman McInnes { 6573501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6583501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6593501a2bdSLois Curfman McInnes int ierr, i, j; 6603501a2bdSLois Curfman McInnes double sum = 0.0; 6613501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6623501a2bdSLois Curfman McInnes 6633501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6643501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6653501a2bdSLois Curfman McInnes } else { 6663501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6673501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6683501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6693501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6703501a2bdSLois Curfman McInnes #else 6713501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6723501a2bdSLois Curfman McInnes #endif 6733501a2bdSLois Curfman McInnes } 6743501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6753501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6763501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6773501a2bdSLois Curfman McInnes } 6783501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6793501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6800452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6813501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 682cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 683096963f5SLois Curfman McInnes *norm = 0.0; 6843501a2bdSLois Curfman McInnes v = mat->v; 6853501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 6863501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 68767e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 6883501a2bdSLois Curfman McInnes } 6893501a2bdSLois Curfman McInnes } 6903501a2bdSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 6913501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 6923501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 6933501a2bdSLois Curfman McInnes } 6940452661fSBarry Smith PetscFree(tmp); 6953501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 6963501a2bdSLois Curfman McInnes } 6973501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 6983501a2bdSLois Curfman McInnes double ntemp; 6993501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 7003501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 7013501a2bdSLois Curfman McInnes } 7023501a2bdSLois Curfman McInnes else { 7033501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 7043501a2bdSLois Curfman McInnes } 7053501a2bdSLois Curfman McInnes } 7063501a2bdSLois Curfman McInnes return 0; 7073501a2bdSLois Curfman McInnes } 7083501a2bdSLois Curfman McInnes 7093501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7103501a2bdSLois Curfman McInnes { 7113501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7123501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7133501a2bdSLois Curfman McInnes Mat B; 7143501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 7153501a2bdSLois Curfman McInnes int j, i, ierr; 7163501a2bdSLois Curfman McInnes Scalar *v; 7173501a2bdSLois Curfman McInnes 7183501a2bdSLois Curfman McInnes if (!matout && M != N) 7193501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 720b4fd4287SBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 721ed2daf61SLois Curfman McInnes CHKERRQ(ierr); 7223501a2bdSLois Curfman McInnes 7233501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 7240452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 7253501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 7263501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 7273501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 7283501a2bdSLois Curfman McInnes v += m; 7293501a2bdSLois Curfman McInnes } 7300452661fSBarry Smith PetscFree(rwork); 7313501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7323501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7333501a2bdSLois Curfman McInnes if (matout) { 7343501a2bdSLois Curfman McInnes *matout = B; 7353501a2bdSLois Curfman McInnes } else { 7363501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 7370452661fSBarry Smith PetscFree(a->rowners); 7383501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7393501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7403501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7410452661fSBarry Smith PetscFree(a); 7423501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7430452661fSBarry Smith PetscHeaderDestroy(B); 7443501a2bdSLois Curfman McInnes } 745096963f5SLois Curfman McInnes return 0; 746096963f5SLois Curfman McInnes } 747096963f5SLois Curfman McInnes 7483d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 7498965ea79SLois Curfman McInnes 7508965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 75139ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 75239ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 75339ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 754096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 755e7ca0642SLois Curfman McInnes /* MatSolve_MPIDense,0, */ 756e7ca0642SLois Curfman McInnes 0,0, 75739ddd567SLois Curfman McInnes 0,0, 7588c469469SLois Curfman McInnes 0,0, 7598c469469SLois Curfman McInnes /* MatLUFactor_MPIDense,0, */ 7608aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 76139ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 762096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 76339ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7648965ea79SLois Curfman McInnes 0, 76539ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 7668c469469SLois Curfman McInnes 0,0,0, 7678c469469SLois Curfman McInnes /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 7688aaee692SLois Curfman McInnes 0,0, 76939ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 77039ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 77139ddd567SLois Curfman McInnes 0,0, 7723d1612f7SBarry Smith 0,0,0,0,0,MatConvertSameType_MPIDense, 773b49de8d1SLois Curfman McInnes 0,0,0,0,0, 774b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIDense}; 7758965ea79SLois Curfman McInnes 7768965ea79SLois Curfman McInnes /*@C 77739ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7788965ea79SLois Curfman McInnes 7798965ea79SLois Curfman McInnes Input Parameters: 7808965ea79SLois Curfman McInnes . comm - MPI communicator 7818965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 7828965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 7838965ea79SLois Curfman McInnes if N is given) 7848965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 7858965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 7868965ea79SLois Curfman McInnes if n is given) 787b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 788dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 7898965ea79SLois Curfman McInnes 7908965ea79SLois Curfman McInnes Output Parameter: 7918965ea79SLois Curfman McInnes . newmat - the matrix 7928965ea79SLois Curfman McInnes 7938965ea79SLois Curfman McInnes Notes: 79439ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 79539ddd567SLois Curfman McInnes storage by columns. 7968965ea79SLois Curfman McInnes 79718f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 79818f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 799b4fd4287SBarry Smith set data=PETSC_NULL. 80018f449edSLois Curfman McInnes 8018965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 8028965ea79SLois Curfman McInnes (possibly both). 8038965ea79SLois Curfman McInnes 8043501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 8053501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 8063501a2bdSLois Curfman McInnes 80739ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 8088965ea79SLois Curfman McInnes 80939ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 8108965ea79SLois Curfman McInnes @*/ 81118f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 8128965ea79SLois Curfman McInnes { 8138965ea79SLois Curfman McInnes Mat mat; 81439ddd567SLois Curfman McInnes Mat_MPIDense *a; 81525cdf11fSBarry Smith int ierr, i,flg; 8168965ea79SLois Curfman McInnes 817ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 818ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 81918f449edSLois Curfman McInnes 8208965ea79SLois Curfman McInnes *newmat = 0; 8210452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 8228965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8230452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8248965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 82539ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 82639ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8278965ea79SLois Curfman McInnes mat->factor = 0; 8288965ea79SLois Curfman McInnes 829622d7880SLois Curfman McInnes a->factor = 0; 8308965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8318965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 8328965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 8338965ea79SLois Curfman McInnes 83439ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 8358965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 83639ddd567SLois Curfman McInnes 83739ddd567SLois Curfman McInnes /* each row stores all columns */ 83839ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 839d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 840d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 8418965ea79SLois Curfman McInnes a->N = N; 8428965ea79SLois Curfman McInnes a->M = M; 84339ddd567SLois Curfman McInnes a->m = m; 84439ddd567SLois Curfman McInnes a->n = n; 8458965ea79SLois Curfman McInnes 8468965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 847d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 848d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 849d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 85039ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8518965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8528965ea79SLois Curfman McInnes a->rowners[0] = 0; 8538965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8548965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8558965ea79SLois Curfman McInnes } 8568965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8578965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 858d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 859d7e8b826SBarry Smith a->cowners[0] = 0; 860d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 861d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 862d7e8b826SBarry Smith } 8638965ea79SLois Curfman McInnes 86418f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8658965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8668965ea79SLois Curfman McInnes 8678965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8688965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8698965ea79SLois Curfman McInnes 8708965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8718965ea79SLois Curfman McInnes a->lvec = 0; 8728965ea79SLois Curfman McInnes a->Mvctx = 0; 87339b7565bSBarry Smith a->roworiented = 1; 8748965ea79SLois Curfman McInnes 8758965ea79SLois Curfman McInnes *newmat = mat; 87625cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 87725cdf11fSBarry Smith if (flg) { 8788c469469SLois Curfman McInnes ierr = MatPrintHelp(mat); CHKERRQ(ierr); 8798c469469SLois Curfman McInnes } 8808965ea79SLois Curfman McInnes return 0; 8818965ea79SLois Curfman McInnes } 8828965ea79SLois Curfman McInnes 8833d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 8848965ea79SLois Curfman McInnes { 8858965ea79SLois Curfman McInnes Mat mat; 8863501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 88739ddd567SLois Curfman McInnes int ierr; 8882ba99913SLois Curfman McInnes FactorCtx *factor; 8898965ea79SLois Curfman McInnes 8908965ea79SLois Curfman McInnes *newmat = 0; 8910452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 8928965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8930452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8948965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 89539ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 89639ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8973501a2bdSLois Curfman McInnes mat->factor = A->factor; 898*c456f294SBarry Smith mat->assembled = PETSC_TRUE; 8998965ea79SLois Curfman McInnes 9008965ea79SLois Curfman McInnes a->m = oldmat->m; 9018965ea79SLois Curfman McInnes a->n = oldmat->n; 9028965ea79SLois Curfman McInnes a->M = oldmat->M; 9038965ea79SLois Curfman McInnes a->N = oldmat->N; 9042ba99913SLois Curfman McInnes if (oldmat->factor) { 9052ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 9062ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 9072ba99913SLois Curfman McInnes } else a->factor = 0; 9088965ea79SLois Curfman McInnes 9098965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 9108965ea79SLois Curfman McInnes a->rend = oldmat->rend; 9118965ea79SLois Curfman McInnes a->size = oldmat->size; 9128965ea79SLois Curfman McInnes a->rank = oldmat->rank; 9138965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 9148965ea79SLois Curfman McInnes 9150452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 91639ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9178965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 9188965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 9198965ea79SLois Curfman McInnes 9208965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 9218965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 92255659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 9238965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 9248965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 9258965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9268965ea79SLois Curfman McInnes *newmat = mat; 9278965ea79SLois Curfman McInnes return 0; 9288965ea79SLois Curfman McInnes } 9298965ea79SLois Curfman McInnes 9308965ea79SLois Curfman McInnes #include "sysio.h" 9318965ea79SLois Curfman McInnes 93239ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 9338965ea79SLois Curfman McInnes { 9348965ea79SLois Curfman McInnes Mat A; 9358965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 9368965ea79SLois Curfman McInnes Scalar *vals,*svals; 9378965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 9388965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 9398965ea79SLois Curfman McInnes MPI_Status status; 9408965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 9418965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 9428965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 9438965ea79SLois Curfman McInnes 9448965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 9458965ea79SLois Curfman McInnes if (!rank) { 9468965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 9478965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 94839ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 9498965ea79SLois Curfman McInnes } 9508965ea79SLois Curfman McInnes 9518965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 9528965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 9538965ea79SLois Curfman McInnes /* determine ownership of all rows */ 9548965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 9550452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 9568965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 9578965ea79SLois Curfman McInnes rowners[0] = 0; 9588965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 9598965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 9608965ea79SLois Curfman McInnes } 9618965ea79SLois Curfman McInnes rstart = rowners[rank]; 9628965ea79SLois Curfman McInnes rend = rowners[rank+1]; 9638965ea79SLois Curfman McInnes 9648965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 9650452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 9668965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 9678965ea79SLois Curfman McInnes if (!rank) { 9680452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 9698965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 9700452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 9718965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 9728965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 9730452661fSBarry Smith PetscFree(sndcounts); 9748965ea79SLois Curfman McInnes } 9758965ea79SLois Curfman McInnes else { 9768965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 9778965ea79SLois Curfman McInnes } 9788965ea79SLois Curfman McInnes 9798965ea79SLois Curfman McInnes if (!rank) { 9808965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 9810452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 982cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 9838965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9848965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 9858965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 9868965ea79SLois Curfman McInnes } 9878965ea79SLois Curfman McInnes } 9880452661fSBarry Smith PetscFree(rowlengths); 9898965ea79SLois Curfman McInnes 9908965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 9918965ea79SLois Curfman McInnes maxnz = 0; 9928965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9930452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 9948965ea79SLois Curfman McInnes } 9950452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 9968965ea79SLois Curfman McInnes 9978965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 9988965ea79SLois Curfman McInnes nz = procsnz[0]; 9990452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 10008965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 10018965ea79SLois Curfman McInnes 10028965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 10038965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10048965ea79SLois Curfman McInnes nz = procsnz[i]; 10058965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 10068965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 10078965ea79SLois Curfman McInnes } 10080452661fSBarry Smith PetscFree(cols); 10098965ea79SLois Curfman McInnes } 10108965ea79SLois Curfman McInnes else { 10118965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 10128965ea79SLois Curfman McInnes nz = 0; 10138965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10148965ea79SLois Curfman McInnes nz += ourlens[i]; 10158965ea79SLois Curfman McInnes } 10160452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 10178965ea79SLois Curfman McInnes 10188965ea79SLois Curfman McInnes /* receive message of column indices*/ 10198965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 10208965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 102139ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10228965ea79SLois Curfman McInnes } 10238965ea79SLois Curfman McInnes 10248965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1025cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 10268965ea79SLois Curfman McInnes jj = 0; 10278965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10288965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 10298965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 10308965ea79SLois Curfman McInnes jj++; 10318965ea79SLois Curfman McInnes } 10328965ea79SLois Curfman McInnes } 10338965ea79SLois Curfman McInnes 10348965ea79SLois Curfman McInnes /* create our matrix */ 10358965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10368965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 10378965ea79SLois Curfman McInnes } 103839ddd567SLois Curfman McInnes if (type == MATMPIDENSE) { 1039b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat); CHKERRQ(ierr); 10408965ea79SLois Curfman McInnes } 10418965ea79SLois Curfman McInnes A = *newmat; 10428965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10438965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 10448965ea79SLois Curfman McInnes } 10458965ea79SLois Curfman McInnes 10468965ea79SLois Curfman McInnes if (!rank) { 10470452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 10488965ea79SLois Curfman McInnes 10498965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 10508965ea79SLois Curfman McInnes nz = procsnz[0]; 10518965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10528965ea79SLois Curfman McInnes 10538965ea79SLois Curfman McInnes /* insert into matrix */ 10548965ea79SLois Curfman McInnes jj = rstart; 10558965ea79SLois Curfman McInnes smycols = mycols; 10568965ea79SLois Curfman McInnes svals = vals; 10578965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10588965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10598965ea79SLois Curfman McInnes smycols += ourlens[i]; 10608965ea79SLois Curfman McInnes svals += ourlens[i]; 10618965ea79SLois Curfman McInnes jj++; 10628965ea79SLois Curfman McInnes } 10638965ea79SLois Curfman McInnes 10648965ea79SLois Curfman McInnes /* read in other processors and ship out */ 10658965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10668965ea79SLois Curfman McInnes nz = procsnz[i]; 10678965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10688965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 10698965ea79SLois Curfman McInnes } 10700452661fSBarry Smith PetscFree(procsnz); 10718965ea79SLois Curfman McInnes } 10728965ea79SLois Curfman McInnes else { 10738965ea79SLois Curfman McInnes /* receive numeric values */ 10740452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 10758965ea79SLois Curfman McInnes 10768965ea79SLois Curfman McInnes /* receive message of values*/ 10778965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 10788965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 107939ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10808965ea79SLois Curfman McInnes 10818965ea79SLois Curfman McInnes /* insert into matrix */ 10828965ea79SLois Curfman McInnes jj = rstart; 10838965ea79SLois Curfman McInnes smycols = mycols; 10848965ea79SLois Curfman McInnes svals = vals; 10858965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10868965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10878965ea79SLois Curfman McInnes smycols += ourlens[i]; 10888965ea79SLois Curfman McInnes svals += ourlens[i]; 10898965ea79SLois Curfman McInnes jj++; 10908965ea79SLois Curfman McInnes } 10918965ea79SLois Curfman McInnes } 10920452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 10938965ea79SLois Curfman McInnes 10948965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10958965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10968965ea79SLois Curfman McInnes return 0; 10978965ea79SLois Curfman McInnes } 1098