18965ea79SLois Curfman McInnes #ifndef lint 2*6d84be18SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.28 1996/02/01 18:52:35 curfman Exp bsmith $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes 1239ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 138965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 148965ea79SLois Curfman McInnes { 1539b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1639b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1739b7565bSBarry Smith int roworiented = A->roworiented; 188965ea79SLois Curfman McInnes 1939b7565bSBarry Smith if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 2039ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 218965ea79SLois Curfman McInnes } 2239b7565bSBarry Smith A->insertmode = addv; 238965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2439ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2539b7565bSBarry Smith if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 268965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 278965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2839b7565bSBarry Smith if (roworiented) { 2939b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 3039b7565bSBarry Smith } 3139b7565bSBarry Smith else { 328965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 3339ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 3439b7565bSBarry Smith if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 3539b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3639b7565bSBarry Smith } 378965ea79SLois Curfman McInnes } 388965ea79SLois Curfman McInnes } 398965ea79SLois Curfman McInnes else { 4039b7565bSBarry Smith if (roworiented) { 4139b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 4239b7565bSBarry Smith } 4339b7565bSBarry Smith else { /* must stash each seperately */ 4439b7565bSBarry Smith row = idxm[i]; 4539b7565bSBarry Smith for ( j=0; j<n; j++ ) { 4639b7565bSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 4739b7565bSBarry Smith CHKERRQ(ierr); 4839b7565bSBarry Smith } 4939b7565bSBarry Smith } 50b49de8d1SLois Curfman McInnes } 51b49de8d1SLois Curfman McInnes } 52b49de8d1SLois Curfman McInnes return 0; 53b49de8d1SLois Curfman McInnes } 54b49de8d1SLois Curfman McInnes 55b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 56b49de8d1SLois Curfman McInnes { 57b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 58b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 59b49de8d1SLois Curfman McInnes 60b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 61b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 62b49de8d1SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 63b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 64b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 65b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 66b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 67b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 68b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 69b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 70b49de8d1SLois Curfman McInnes } 71b49de8d1SLois Curfman McInnes } 72b49de8d1SLois Curfman McInnes else { 73b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 748965ea79SLois Curfman McInnes } 758965ea79SLois Curfman McInnes } 768965ea79SLois Curfman McInnes return 0; 778965ea79SLois Curfman McInnes } 788965ea79SLois Curfman McInnes 7939ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 808965ea79SLois Curfman McInnes { 8139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 828965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 8339ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 848965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 8539ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 868965ea79SLois Curfman McInnes InsertMode addv; 8739ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 888965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 898965ea79SLois Curfman McInnes 908965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 91*6d84be18SBarry Smith MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 9239ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 9339ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 948965ea79SLois Curfman McInnes } 9539ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 968965ea79SLois Curfman McInnes 978965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 980452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 99cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1000452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 10139ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 10239ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1038965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1048965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1058965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1068965ea79SLois Curfman McInnes } 1078965ea79SLois Curfman McInnes } 1088965ea79SLois Curfman McInnes } 1098965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1108965ea79SLois Curfman McInnes 1118965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1120452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 113*6d84be18SBarry Smith MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 1148965ea79SLois Curfman McInnes nreceives = work[rank]; 115*6d84be18SBarry Smith MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 1168965ea79SLois Curfman McInnes nmax = work[rank]; 1170452661fSBarry Smith PetscFree(work); 1188965ea79SLois Curfman McInnes 1198965ea79SLois Curfman McInnes /* post receives: 1208965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1218965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1228965ea79SLois Curfman McInnes to simplify the message passing. 1238965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1248965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1258965ea79SLois Curfman McInnes this is a lot of wasted space. 1268965ea79SLois Curfman McInnes 1278965ea79SLois Curfman McInnes This could be done better. 1288965ea79SLois Curfman McInnes */ 1290452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 1308965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 1310452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 1328965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 1338965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 134*6d84be18SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1358965ea79SLois Curfman McInnes comm,recv_waits+i); 1368965ea79SLois Curfman McInnes } 1378965ea79SLois Curfman McInnes 1388965ea79SLois Curfman McInnes /* do sends: 1398965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1408965ea79SLois Curfman McInnes the ith processor 1418965ea79SLois Curfman McInnes */ 1420452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 14339ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1440452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1458965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1460452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1478965ea79SLois Curfman McInnes starts[0] = 0; 1488965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 14939ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 15039ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 15139ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 15239ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1538965ea79SLois Curfman McInnes } 1540452661fSBarry Smith PetscFree(owner); 1558965ea79SLois Curfman McInnes starts[0] = 0; 1568965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1578965ea79SLois Curfman McInnes count = 0; 1588965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1598965ea79SLois Curfman McInnes if (procs[i]) { 160*6d84be18SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 1618965ea79SLois Curfman McInnes comm,send_waits+count++); 1628965ea79SLois Curfman McInnes } 1638965ea79SLois Curfman McInnes } 1640452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1658965ea79SLois Curfman McInnes 1668965ea79SLois Curfman McInnes /* Free cache space */ 16739ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1688965ea79SLois Curfman McInnes 16939ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 17039ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 17139ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 17239ddd567SLois Curfman McInnes mdn->rmax = nmax; 1738965ea79SLois Curfman McInnes 1748965ea79SLois Curfman McInnes return 0; 1758965ea79SLois Curfman McInnes } 17639ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1778965ea79SLois Curfman McInnes 17839ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1798965ea79SLois Curfman McInnes { 18039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1818965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 18239ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1838965ea79SLois Curfman McInnes Scalar *values,val; 18439ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1858965ea79SLois Curfman McInnes 1868965ea79SLois Curfman McInnes /* wait on receives */ 1878965ea79SLois Curfman McInnes while (count) { 18839ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 1898965ea79SLois Curfman McInnes /* unpack receives into our local space */ 19039ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 1918965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 1928965ea79SLois Curfman McInnes n = n/3; 1938965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 194227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - mdn->rstart; 195227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 1968965ea79SLois Curfman McInnes val = values[3*i+2]; 19739ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 19839ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 1998965ea79SLois Curfman McInnes } 20039ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 2018965ea79SLois Curfman McInnes } 2028965ea79SLois Curfman McInnes count--; 2038965ea79SLois Curfman McInnes } 2040452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2058965ea79SLois Curfman McInnes 2068965ea79SLois Curfman McInnes /* wait on sends */ 20739ddd567SLois Curfman McInnes if (mdn->nsends) { 2080452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 2098965ea79SLois Curfman McInnes CHKPTRQ(send_status); 21039ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2110452661fSBarry Smith PetscFree(send_status); 2128965ea79SLois Curfman McInnes } 2130452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2148965ea79SLois Curfman McInnes 21539ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 21639ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 21739ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2188965ea79SLois Curfman McInnes 219227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 22039ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2218965ea79SLois Curfman McInnes } 2228965ea79SLois Curfman McInnes return 0; 2238965ea79SLois Curfman McInnes } 2248965ea79SLois Curfman McInnes 22539ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2268965ea79SLois Curfman McInnes { 22739ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 22839ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2298965ea79SLois Curfman McInnes } 2308965ea79SLois Curfman McInnes 2318965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2328965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2338965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2343501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2358965ea79SLois Curfman McInnes routine. 2368965ea79SLois Curfman McInnes */ 23739ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2388965ea79SLois Curfman McInnes { 23939ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2408965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2418965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2428965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2438965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2448965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2458965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2468965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2478965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2488965ea79SLois Curfman McInnes IS istmp; 2498965ea79SLois Curfman McInnes 2508965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 2518965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2528965ea79SLois Curfman McInnes 2538965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2540452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 255cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2560452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2578965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2588965ea79SLois Curfman McInnes idx = rows[i]; 2598965ea79SLois Curfman McInnes found = 0; 2608965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2618965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2628965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2638965ea79SLois Curfman McInnes } 2648965ea79SLois Curfman McInnes } 26539ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2668965ea79SLois Curfman McInnes } 2678965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2688965ea79SLois Curfman McInnes 2698965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2700452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2718965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2728965ea79SLois Curfman McInnes nrecvs = work[rank]; 2738965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2748965ea79SLois Curfman McInnes nmax = work[rank]; 2750452661fSBarry Smith PetscFree(work); 2768965ea79SLois Curfman McInnes 2778965ea79SLois Curfman McInnes /* post receives: */ 2780452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2798965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2800452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2818965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2828965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2838965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2848965ea79SLois Curfman McInnes } 2858965ea79SLois Curfman McInnes 2868965ea79SLois Curfman McInnes /* do sends: 2878965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2888965ea79SLois Curfman McInnes the ith processor 2898965ea79SLois Curfman McInnes */ 2900452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 2910452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 2928965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 2930452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 2948965ea79SLois Curfman McInnes starts[0] = 0; 2958965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2968965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2978965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 2988965ea79SLois Curfman McInnes } 2998965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3008965ea79SLois Curfman McInnes 3018965ea79SLois Curfman McInnes starts[0] = 0; 3028965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3038965ea79SLois Curfman McInnes count = 0; 3048965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3058965ea79SLois Curfman McInnes if (procs[i]) { 3068965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3078965ea79SLois Curfman McInnes } 3088965ea79SLois Curfman McInnes } 3090452661fSBarry Smith PetscFree(starts); 3108965ea79SLois Curfman McInnes 3118965ea79SLois Curfman McInnes base = owners[rank]; 3128965ea79SLois Curfman McInnes 3138965ea79SLois Curfman McInnes /* wait on receives */ 3140452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3158965ea79SLois Curfman McInnes source = lens + nrecvs; 3168965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3178965ea79SLois Curfman McInnes while (count) { 3188965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3198965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3208965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3218965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3228965ea79SLois Curfman McInnes lens[imdex] = n; 3238965ea79SLois Curfman McInnes slen += n; 3248965ea79SLois Curfman McInnes count--; 3258965ea79SLois Curfman McInnes } 3260452661fSBarry Smith PetscFree(recv_waits); 3278965ea79SLois Curfman McInnes 3288965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3290452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3308965ea79SLois Curfman McInnes count = 0; 3318965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3328965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3338965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3348965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3358965ea79SLois Curfman McInnes } 3368965ea79SLois Curfman McInnes } 3370452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3380452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3398965ea79SLois Curfman McInnes 3408965ea79SLois Curfman McInnes /* actually zap the local rows */ 3418965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3428965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3430452661fSBarry Smith PetscFree(lrows); 3448965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3458965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3468965ea79SLois Curfman McInnes 3478965ea79SLois Curfman McInnes /* wait on sends */ 3488965ea79SLois Curfman McInnes if (nsends) { 3490452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3508965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3518965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3520452661fSBarry Smith PetscFree(send_status); 3538965ea79SLois Curfman McInnes } 3540452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3558965ea79SLois Curfman McInnes 3568965ea79SLois Curfman McInnes return 0; 3578965ea79SLois Curfman McInnes } 3588965ea79SLois Curfman McInnes 35939ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3608965ea79SLois Curfman McInnes { 36139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3628965ea79SLois Curfman McInnes int ierr; 363c456f294SBarry Smith 36439ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 36539ddd567SLois Curfman McInnes CHKERRQ(ierr); 36639ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 36739ddd567SLois Curfman McInnes CHKERRQ(ierr); 36839ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3698965ea79SLois Curfman McInnes return 0; 3708965ea79SLois Curfman McInnes } 3718965ea79SLois Curfman McInnes 37239ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3738965ea79SLois Curfman McInnes { 37439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3758965ea79SLois Curfman McInnes int ierr; 376c456f294SBarry Smith 377c456f294SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 378c456f294SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 37939ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3808965ea79SLois Curfman McInnes return 0; 3818965ea79SLois Curfman McInnes } 3828965ea79SLois Curfman McInnes 383096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 384096963f5SLois Curfman McInnes { 385096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 386096963f5SLois Curfman McInnes int ierr; 3873501a2bdSLois Curfman McInnes Scalar zero = 0.0; 388096963f5SLois Curfman McInnes 3893501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 390096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 391096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 392096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 393096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 394096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 395096963f5SLois Curfman McInnes return 0; 396096963f5SLois Curfman McInnes } 397096963f5SLois Curfman McInnes 398096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 399096963f5SLois Curfman McInnes { 400096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 401096963f5SLois Curfman McInnes int ierr; 402096963f5SLois Curfman McInnes 4033501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 4043501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 405096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 406096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 407096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 408096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 409096963f5SLois Curfman McInnes return 0; 410096963f5SLois Curfman McInnes } 411096963f5SLois Curfman McInnes 41239ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4138965ea79SLois Curfman McInnes { 41439ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 415096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 416096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 417096963f5SLois Curfman McInnes Scalar *x; 418ed3cc1f0SBarry Smith 419096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 420096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 421096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 422096963f5SLois Curfman McInnes radd = a->rstart*m*m; 423096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 424096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 425096963f5SLois Curfman McInnes } 426096963f5SLois Curfman McInnes return 0; 4278965ea79SLois Curfman McInnes } 4288965ea79SLois Curfman McInnes 42939ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4308965ea79SLois Curfman McInnes { 4318965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4323501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4338965ea79SLois Curfman McInnes int ierr; 434ed3cc1f0SBarry Smith 4358965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4363501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4378965ea79SLois Curfman McInnes #endif 4380452661fSBarry Smith PetscFree(mdn->rowners); 4393501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4403501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4413501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 442622d7880SLois Curfman McInnes if (mdn->factor) { 443622d7880SLois Curfman McInnes if (mdn->factor->temp) PetscFree(mdn->factor->temp); 444622d7880SLois Curfman McInnes if (mdn->factor->tag) PetscFree(mdn->factor->tag); 445622d7880SLois Curfman McInnes if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 446622d7880SLois Curfman McInnes PetscFree(mdn->factor); 447622d7880SLois Curfman McInnes } 4480452661fSBarry Smith PetscFree(mdn); 4498965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4500452661fSBarry Smith PetscHeaderDestroy(mat); 4518965ea79SLois Curfman McInnes return 0; 4528965ea79SLois Curfman McInnes } 45339ddd567SLois Curfman McInnes 4548965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4558965ea79SLois Curfman McInnes 45639ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4578965ea79SLois Curfman McInnes { 45839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4598965ea79SLois Curfman McInnes int ierr; 46039ddd567SLois Curfman McInnes if (mdn->size == 1) { 46139ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4628965ea79SLois Curfman McInnes } 46339ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4648965ea79SLois Curfman McInnes return 0; 4658965ea79SLois Curfman McInnes } 4668965ea79SLois Curfman McInnes 46739ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4688965ea79SLois Curfman McInnes { 46939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 47039ddd567SLois Curfman McInnes int ierr, format; 4718965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 4728965ea79SLois Curfman McInnes FILE *fd; 4738965ea79SLois Curfman McInnes 474227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 4758965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 4768965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 4773501a2bdSLois Curfman McInnes if (format == FILE_FORMAT_INFO_DETAILED) { 478096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 479096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 480096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 481096963f5SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 4823501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 483096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 484096963f5SLois Curfman McInnes fflush(fd); 485096963f5SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4863501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 4873501a2bdSLois Curfman McInnes return 0; 4883501a2bdSLois Curfman McInnes } 4893501a2bdSLois Curfman McInnes else if (format == FILE_FORMAT_INFO) { 490096963f5SLois Curfman McInnes return 0; 4918965ea79SLois Curfman McInnes } 4928965ea79SLois Curfman McInnes } 4938965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 4948965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 49539ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 49639ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 49739ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4988965ea79SLois Curfman McInnes fflush(fd); 4998965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 5008965ea79SLois Curfman McInnes } 5018965ea79SLois Curfman McInnes else { 50239ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5038965ea79SLois Curfman McInnes if (size == 1) { 50439ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5058965ea79SLois Curfman McInnes } 5068965ea79SLois Curfman McInnes else { 5078965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5088965ea79SLois Curfman McInnes Mat A; 50939ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 51039ddd567SLois Curfman McInnes Scalar *vals; 51139ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5128965ea79SLois Curfman McInnes 5138965ea79SLois Curfman McInnes if (!rank) { 514b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5158965ea79SLois Curfman McInnes } 5168965ea79SLois Curfman McInnes else { 517b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5188965ea79SLois Curfman McInnes } 5198965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5208965ea79SLois Curfman McInnes 52139ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 52239ddd567SLois Curfman McInnes but it's quick for now */ 52339ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5248965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 52539ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 52639ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 52739ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 52839ddd567SLois Curfman McInnes row++; 5298965ea79SLois Curfman McInnes } 5308965ea79SLois Curfman McInnes 5318965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5328965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5338965ea79SLois Curfman McInnes if (!rank) { 53439ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5358965ea79SLois Curfman McInnes } 5368965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5378965ea79SLois Curfman McInnes } 5388965ea79SLois Curfman McInnes } 5398965ea79SLois Curfman McInnes return 0; 5408965ea79SLois Curfman McInnes } 5418965ea79SLois Curfman McInnes 54239ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5438965ea79SLois Curfman McInnes { 5448965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 5458965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 54639ddd567SLois Curfman McInnes int ierr; 5478965ea79SLois Curfman McInnes 5488965ea79SLois Curfman McInnes if (!viewer) { 5498965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 5508965ea79SLois Curfman McInnes } 55139ddd567SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 55239ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5538965ea79SLois Curfman McInnes } 5548965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 55539ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5568965ea79SLois Curfman McInnes } 5578965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 55839ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5598965ea79SLois Curfman McInnes } 5608965ea79SLois Curfman McInnes return 0; 5618965ea79SLois Curfman McInnes } 5628965ea79SLois Curfman McInnes 5633501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5648965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5658965ea79SLois Curfman McInnes { 5663501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5673501a2bdSLois Curfman McInnes Mat mdn = mat->A; 56839ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5698965ea79SLois Curfman McInnes 5703501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5718965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 5728965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 5738965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5743501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 5758965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5768965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5773501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 5788965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5798965ea79SLois Curfman McInnes } 5808965ea79SLois Curfman McInnes return 0; 5818965ea79SLois Curfman McInnes } 5828965ea79SLois Curfman McInnes 5838c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 5848aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 5858aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 5868aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 5878c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 5888aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 5898aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 5908aaee692SLois Curfman McInnes 59139ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 5928965ea79SLois Curfman McInnes { 59339ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 5948965ea79SLois Curfman McInnes 5958965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 5968965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 5978965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 5988965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 5998965ea79SLois Curfman McInnes MatSetOption(a->A,op); 6008965ea79SLois Curfman McInnes } 6018965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 6028965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 6038965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 6048965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 60539ddd567SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6068965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 60739b7565bSBarry Smith { a->roworiented = 0; MatSetOption(a->A,op);} 6088965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 60939ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 6108965ea79SLois Curfman McInnes else 61139ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 6128965ea79SLois Curfman McInnes return 0; 6138965ea79SLois Curfman McInnes } 6148965ea79SLois Curfman McInnes 6153501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6168965ea79SLois Curfman McInnes { 6173501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6188965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6198965ea79SLois Curfman McInnes return 0; 6208965ea79SLois Curfman McInnes } 6218965ea79SLois Curfman McInnes 6223501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6238965ea79SLois Curfman McInnes { 6243501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6258965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6268965ea79SLois Curfman McInnes return 0; 6278965ea79SLois Curfman McInnes } 6288965ea79SLois Curfman McInnes 6293501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6308965ea79SLois Curfman McInnes { 6313501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6328965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 6338965ea79SLois Curfman McInnes return 0; 6348965ea79SLois Curfman McInnes } 6358965ea79SLois Curfman McInnes 6363501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6378965ea79SLois Curfman McInnes { 6383501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 63939ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6408965ea79SLois Curfman McInnes 64139ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6428965ea79SLois Curfman McInnes lrow = row - rstart; 64339ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6448965ea79SLois Curfman McInnes } 6458965ea79SLois Curfman McInnes 64639ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6478965ea79SLois Curfman McInnes { 6480452661fSBarry Smith if (idx) PetscFree(*idx); 6490452661fSBarry Smith if (v) PetscFree(*v); 6508965ea79SLois Curfman McInnes return 0; 6518965ea79SLois Curfman McInnes } 6528965ea79SLois Curfman McInnes 653cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 654096963f5SLois Curfman McInnes { 6553501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6563501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6573501a2bdSLois Curfman McInnes int ierr, i, j; 6583501a2bdSLois Curfman McInnes double sum = 0.0; 6593501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6603501a2bdSLois Curfman McInnes 6613501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6623501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6633501a2bdSLois Curfman McInnes } else { 6643501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6653501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6663501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6673501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6683501a2bdSLois Curfman McInnes #else 6693501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6703501a2bdSLois Curfman McInnes #endif 6713501a2bdSLois Curfman McInnes } 672*6d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6733501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6743501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6753501a2bdSLois Curfman McInnes } 6763501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6773501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6780452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6793501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 680cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 681096963f5SLois Curfman McInnes *norm = 0.0; 6823501a2bdSLois Curfman McInnes v = mat->v; 6833501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 6843501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 68567e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 6863501a2bdSLois Curfman McInnes } 6873501a2bdSLois Curfman McInnes } 688*6d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 6893501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 6903501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 6913501a2bdSLois Curfman McInnes } 6920452661fSBarry Smith PetscFree(tmp); 6933501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 6943501a2bdSLois Curfman McInnes } 6953501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 6963501a2bdSLois Curfman McInnes double ntemp; 6973501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 698*6d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 6993501a2bdSLois Curfman McInnes } 7003501a2bdSLois Curfman McInnes else { 7013501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 7023501a2bdSLois Curfman McInnes } 7033501a2bdSLois Curfman McInnes } 7043501a2bdSLois Curfman McInnes return 0; 7053501a2bdSLois Curfman McInnes } 7063501a2bdSLois Curfman McInnes 7073501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7083501a2bdSLois Curfman McInnes { 7093501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7103501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7113501a2bdSLois Curfman McInnes Mat B; 7123501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 7133501a2bdSLois Curfman McInnes int j, i, ierr; 7143501a2bdSLois Curfman McInnes Scalar *v; 7153501a2bdSLois Curfman McInnes 7163638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 7173501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 718b4fd4287SBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 719ed2daf61SLois Curfman McInnes CHKERRQ(ierr); 7203501a2bdSLois Curfman McInnes 7213501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 7220452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 7233501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 7243501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 7253501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 7263501a2bdSLois Curfman McInnes v += m; 7273501a2bdSLois Curfman McInnes } 7280452661fSBarry Smith PetscFree(rwork); 7293501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7303501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7313638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 7323501a2bdSLois Curfman McInnes *matout = B; 7333501a2bdSLois Curfman McInnes } else { 7343501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 7350452661fSBarry Smith PetscFree(a->rowners); 7363501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7373501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7383501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7390452661fSBarry Smith PetscFree(a); 7403501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7410452661fSBarry Smith PetscHeaderDestroy(B); 7423501a2bdSLois Curfman McInnes } 743096963f5SLois Curfman McInnes return 0; 744096963f5SLois Curfman McInnes } 745096963f5SLois Curfman McInnes 7463d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 7478965ea79SLois Curfman McInnes 7488965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 74939ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 75039ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 75139ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 752096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 753e7ca0642SLois Curfman McInnes /* MatSolve_MPIDense,0, */ 754e7ca0642SLois Curfman McInnes 0,0, 75539ddd567SLois Curfman McInnes 0,0, 7568c469469SLois Curfman McInnes 0,0, 7578c469469SLois Curfman McInnes /* MatLUFactor_MPIDense,0, */ 7588aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 75939ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 760096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 76139ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7628965ea79SLois Curfman McInnes 0, 76339ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 7648c469469SLois Curfman McInnes 0,0,0, 7658c469469SLois Curfman McInnes /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 7668aaee692SLois Curfman McInnes 0,0, 76739ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 76839ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 76939ddd567SLois Curfman McInnes 0,0, 7703d1612f7SBarry Smith 0,0,0,0,0,MatConvertSameType_MPIDense, 771b49de8d1SLois Curfman McInnes 0,0,0,0,0, 772b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIDense}; 7738965ea79SLois Curfman McInnes 7748965ea79SLois Curfman McInnes /*@C 77539ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7768965ea79SLois Curfman McInnes 7778965ea79SLois Curfman McInnes Input Parameters: 7788965ea79SLois Curfman McInnes . comm - MPI communicator 7798965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 7808965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 7818965ea79SLois Curfman McInnes if N is given) 7828965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 7838965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 7848965ea79SLois Curfman McInnes if n is given) 785b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 786dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 7878965ea79SLois Curfman McInnes 7888965ea79SLois Curfman McInnes Output Parameter: 7898965ea79SLois Curfman McInnes . newmat - the matrix 7908965ea79SLois Curfman McInnes 7918965ea79SLois Curfman McInnes Notes: 79239ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 79339ddd567SLois Curfman McInnes storage by columns. 7948965ea79SLois Curfman McInnes 79518f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 79618f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 797b4fd4287SBarry Smith set data=PETSC_NULL. 79818f449edSLois Curfman McInnes 7998965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 8008965ea79SLois Curfman McInnes (possibly both). 8018965ea79SLois Curfman McInnes 8023501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 8033501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 8043501a2bdSLois Curfman McInnes 80539ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 8068965ea79SLois Curfman McInnes 80739ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 8088965ea79SLois Curfman McInnes @*/ 80918f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 8108965ea79SLois Curfman McInnes { 8118965ea79SLois Curfman McInnes Mat mat; 81239ddd567SLois Curfman McInnes Mat_MPIDense *a; 81325cdf11fSBarry Smith int ierr, i,flg; 8148965ea79SLois Curfman McInnes 815ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 816ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 81718f449edSLois Curfman McInnes 8188965ea79SLois Curfman McInnes *newmat = 0; 8190452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 8208965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8210452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8228965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 82339ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 82439ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8258965ea79SLois Curfman McInnes mat->factor = 0; 8268965ea79SLois Curfman McInnes 827622d7880SLois Curfman McInnes a->factor = 0; 8288965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8298965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 8308965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 8318965ea79SLois Curfman McInnes 83239ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 8338965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 83439ddd567SLois Curfman McInnes 83539ddd567SLois Curfman McInnes /* each row stores all columns */ 83639ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 837d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 838d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 8398965ea79SLois Curfman McInnes a->N = N; 8408965ea79SLois Curfman McInnes a->M = M; 84139ddd567SLois Curfman McInnes a->m = m; 84239ddd567SLois Curfman McInnes a->n = n; 8438965ea79SLois Curfman McInnes 8448965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 845d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 846d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 847d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 84839ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8498965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8508965ea79SLois Curfman McInnes a->rowners[0] = 0; 8518965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8528965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8538965ea79SLois Curfman McInnes } 8548965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8558965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 856d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 857d7e8b826SBarry Smith a->cowners[0] = 0; 858d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 859d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 860d7e8b826SBarry Smith } 8618965ea79SLois Curfman McInnes 86218f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8638965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8648965ea79SLois Curfman McInnes 8658965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8668965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8678965ea79SLois Curfman McInnes 8688965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8698965ea79SLois Curfman McInnes a->lvec = 0; 8708965ea79SLois Curfman McInnes a->Mvctx = 0; 87139b7565bSBarry Smith a->roworiented = 1; 8728965ea79SLois Curfman McInnes 8738965ea79SLois Curfman McInnes *newmat = mat; 87425cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 87525cdf11fSBarry Smith if (flg) { 8768c469469SLois Curfman McInnes ierr = MatPrintHelp(mat); CHKERRQ(ierr); 8778c469469SLois Curfman McInnes } 8788965ea79SLois Curfman McInnes return 0; 8798965ea79SLois Curfman McInnes } 8808965ea79SLois Curfman McInnes 8813d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 8828965ea79SLois Curfman McInnes { 8838965ea79SLois Curfman McInnes Mat mat; 8843501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 88539ddd567SLois Curfman McInnes int ierr; 8862ba99913SLois Curfman McInnes FactorCtx *factor; 8878965ea79SLois Curfman McInnes 8888965ea79SLois Curfman McInnes *newmat = 0; 8890452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 8908965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8910452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8928965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 89339ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 89439ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8953501a2bdSLois Curfman McInnes mat->factor = A->factor; 896c456f294SBarry Smith mat->assembled = PETSC_TRUE; 8978965ea79SLois Curfman McInnes 8988965ea79SLois Curfman McInnes a->m = oldmat->m; 8998965ea79SLois Curfman McInnes a->n = oldmat->n; 9008965ea79SLois Curfman McInnes a->M = oldmat->M; 9018965ea79SLois Curfman McInnes a->N = oldmat->N; 9022ba99913SLois Curfman McInnes if (oldmat->factor) { 9032ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 9042ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 9052ba99913SLois Curfman McInnes } else a->factor = 0; 9068965ea79SLois Curfman McInnes 9078965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 9088965ea79SLois Curfman McInnes a->rend = oldmat->rend; 9098965ea79SLois Curfman McInnes a->size = oldmat->size; 9108965ea79SLois Curfman McInnes a->rank = oldmat->rank; 9118965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 9128965ea79SLois Curfman McInnes 9130452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 91439ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9158965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 9168965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 9178965ea79SLois Curfman McInnes 9188965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 9198965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 92055659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 9218965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 9228965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 9238965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9248965ea79SLois Curfman McInnes *newmat = mat; 9258965ea79SLois Curfman McInnes return 0; 9268965ea79SLois Curfman McInnes } 9278965ea79SLois Curfman McInnes 9288965ea79SLois Curfman McInnes #include "sysio.h" 9298965ea79SLois Curfman McInnes 93039ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 9318965ea79SLois Curfman McInnes { 9328965ea79SLois Curfman McInnes Mat A; 9338965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 9348965ea79SLois Curfman McInnes Scalar *vals,*svals; 9358965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 9368965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 9378965ea79SLois Curfman McInnes MPI_Status status; 9388965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 9398965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 9408965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 9418965ea79SLois Curfman McInnes 9428965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 9438965ea79SLois Curfman McInnes if (!rank) { 9448965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 9458965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 94639ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 9478965ea79SLois Curfman McInnes } 9488965ea79SLois Curfman McInnes 9498965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 9508965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 9518965ea79SLois Curfman McInnes /* determine ownership of all rows */ 9528965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 9530452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 9548965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 9558965ea79SLois Curfman McInnes rowners[0] = 0; 9568965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 9578965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 9588965ea79SLois Curfman McInnes } 9598965ea79SLois Curfman McInnes rstart = rowners[rank]; 9608965ea79SLois Curfman McInnes rend = rowners[rank+1]; 9618965ea79SLois Curfman McInnes 9628965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 9630452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 9648965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 9658965ea79SLois Curfman McInnes if (!rank) { 9660452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 9678965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 9680452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 9698965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 9708965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 9710452661fSBarry Smith PetscFree(sndcounts); 9728965ea79SLois Curfman McInnes } 9738965ea79SLois Curfman McInnes else { 9748965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 9758965ea79SLois Curfman McInnes } 9768965ea79SLois Curfman McInnes 9778965ea79SLois Curfman McInnes if (!rank) { 9788965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 9790452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 980cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 9818965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9828965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 9838965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 9848965ea79SLois Curfman McInnes } 9858965ea79SLois Curfman McInnes } 9860452661fSBarry Smith PetscFree(rowlengths); 9878965ea79SLois Curfman McInnes 9888965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 9898965ea79SLois Curfman McInnes maxnz = 0; 9908965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9910452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 9928965ea79SLois Curfman McInnes } 9930452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 9948965ea79SLois Curfman McInnes 9958965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 9968965ea79SLois Curfman McInnes nz = procsnz[0]; 9970452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9988965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 9998965ea79SLois Curfman McInnes 10008965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 10018965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10028965ea79SLois Curfman McInnes nz = procsnz[i]; 10038965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 10048965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 10058965ea79SLois Curfman McInnes } 10060452661fSBarry Smith PetscFree(cols); 10078965ea79SLois Curfman McInnes } 10088965ea79SLois Curfman McInnes else { 10098965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 10108965ea79SLois Curfman McInnes nz = 0; 10118965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10128965ea79SLois Curfman McInnes nz += ourlens[i]; 10138965ea79SLois Curfman McInnes } 10140452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 10158965ea79SLois Curfman McInnes 10168965ea79SLois Curfman McInnes /* receive message of column indices*/ 10178965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 10188965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 101939ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10208965ea79SLois Curfman McInnes } 10218965ea79SLois Curfman McInnes 10228965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1023cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 10248965ea79SLois Curfman McInnes jj = 0; 10258965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10268965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 10278965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 10288965ea79SLois Curfman McInnes jj++; 10298965ea79SLois Curfman McInnes } 10308965ea79SLois Curfman McInnes } 10318965ea79SLois Curfman McInnes 10328965ea79SLois Curfman McInnes /* create our matrix */ 10338965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10348965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 10358965ea79SLois Curfman McInnes } 103639ddd567SLois Curfman McInnes if (type == MATMPIDENSE) { 1037b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 10388965ea79SLois Curfman McInnes } 10398965ea79SLois Curfman McInnes A = *newmat; 10408965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10418965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 10428965ea79SLois Curfman McInnes } 10438965ea79SLois Curfman McInnes 10448965ea79SLois Curfman McInnes if (!rank) { 10450452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 10468965ea79SLois Curfman McInnes 10478965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 10488965ea79SLois Curfman McInnes nz = procsnz[0]; 10498965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10508965ea79SLois Curfman McInnes 10518965ea79SLois Curfman McInnes /* insert into matrix */ 10528965ea79SLois Curfman McInnes jj = rstart; 10538965ea79SLois Curfman McInnes smycols = mycols; 10548965ea79SLois Curfman McInnes svals = vals; 10558965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10568965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10578965ea79SLois Curfman McInnes smycols += ourlens[i]; 10588965ea79SLois Curfman McInnes svals += ourlens[i]; 10598965ea79SLois Curfman McInnes jj++; 10608965ea79SLois Curfman McInnes } 10618965ea79SLois Curfman McInnes 10628965ea79SLois Curfman McInnes /* read in other processors and ship out */ 10638965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10648965ea79SLois Curfman McInnes nz = procsnz[i]; 10658965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10668965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 10678965ea79SLois Curfman McInnes } 10680452661fSBarry Smith PetscFree(procsnz); 10698965ea79SLois Curfman McInnes } 10708965ea79SLois Curfman McInnes else { 10718965ea79SLois Curfman McInnes /* receive numeric values */ 10720452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 10738965ea79SLois Curfman McInnes 10748965ea79SLois Curfman McInnes /* receive message of values*/ 10758965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 10768965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 107739ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10788965ea79SLois Curfman McInnes 10798965ea79SLois Curfman McInnes /* insert into matrix */ 10808965ea79SLois Curfman McInnes jj = rstart; 10818965ea79SLois Curfman McInnes smycols = mycols; 10828965ea79SLois Curfman McInnes svals = vals; 10838965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10848965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10858965ea79SLois Curfman McInnes smycols += ourlens[i]; 10868965ea79SLois Curfman McInnes svals += ourlens[i]; 10878965ea79SLois Curfman McInnes jj++; 10888965ea79SLois Curfman McInnes } 10898965ea79SLois Curfman McInnes } 10900452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 10918965ea79SLois Curfman McInnes 10928965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10938965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10948965ea79SLois Curfman McInnes return 0; 10958965ea79SLois Curfman McInnes } 1096