18965ea79SLois Curfman McInnes #ifndef lint 2*3638b69dSLois Curfman McInnes static char vcid[] = "$Id: mpidense.c,v 1.27 1996/01/26 04:33:42 bsmith Exp curfman $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes 1239ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 138965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 148965ea79SLois Curfman McInnes { 1539b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1639b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1739b7565bSBarry Smith int roworiented = A->roworiented; 188965ea79SLois Curfman McInnes 1939b7565bSBarry Smith if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 2039ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 218965ea79SLois Curfman McInnes } 2239b7565bSBarry Smith A->insertmode = addv; 238965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2439ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2539b7565bSBarry Smith if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 268965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 278965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2839b7565bSBarry Smith if (roworiented) { 2939b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 3039b7565bSBarry Smith } 3139b7565bSBarry Smith else { 328965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 3339ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 3439b7565bSBarry Smith if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 3539b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3639b7565bSBarry Smith } 378965ea79SLois Curfman McInnes } 388965ea79SLois Curfman McInnes } 398965ea79SLois Curfman McInnes else { 4039b7565bSBarry Smith if (roworiented) { 4139b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 4239b7565bSBarry Smith } 4339b7565bSBarry Smith else { /* must stash each seperately */ 4439b7565bSBarry Smith row = idxm[i]; 4539b7565bSBarry Smith for ( j=0; j<n; j++ ) { 4639b7565bSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 4739b7565bSBarry Smith CHKERRQ(ierr); 4839b7565bSBarry Smith } 4939b7565bSBarry Smith } 50b49de8d1SLois Curfman McInnes } 51b49de8d1SLois Curfman McInnes } 52b49de8d1SLois Curfman McInnes return 0; 53b49de8d1SLois Curfman McInnes } 54b49de8d1SLois Curfman McInnes 55b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 56b49de8d1SLois Curfman McInnes { 57b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 58b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 59b49de8d1SLois Curfman McInnes 60b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 61b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 62b49de8d1SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 63b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 64b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 65b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 66b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 67b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 68b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 69b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 70b49de8d1SLois Curfman McInnes } 71b49de8d1SLois Curfman McInnes } 72b49de8d1SLois Curfman McInnes else { 73b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 748965ea79SLois Curfman McInnes } 758965ea79SLois Curfman McInnes } 768965ea79SLois Curfman McInnes return 0; 778965ea79SLois Curfman McInnes } 788965ea79SLois Curfman McInnes 7939ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 808965ea79SLois Curfman McInnes { 8139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 828965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 8339ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 848965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 8539ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 868965ea79SLois Curfman McInnes InsertMode addv; 8739ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 888965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 898965ea79SLois Curfman McInnes 908965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 9139ddd567SLois Curfman McInnes MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 9239ddd567SLois Curfman McInnes MPI_BOR,comm); 9339ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 9439ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 958965ea79SLois Curfman McInnes } 9639ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 978965ea79SLois Curfman McInnes 988965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 990452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 100cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1010452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 10239ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 10339ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1048965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1058965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1068965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1078965ea79SLois Curfman McInnes } 1088965ea79SLois Curfman McInnes } 1098965ea79SLois Curfman McInnes } 1108965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1118965ea79SLois Curfman McInnes 1128965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1130452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 11439ddd567SLois Curfman McInnes MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 1158965ea79SLois Curfman McInnes nreceives = work[rank]; 11639ddd567SLois Curfman McInnes MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 1178965ea79SLois Curfman McInnes nmax = work[rank]; 1180452661fSBarry Smith PetscFree(work); 1198965ea79SLois Curfman McInnes 1208965ea79SLois Curfman McInnes /* post receives: 1218965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1228965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1238965ea79SLois Curfman McInnes to simplify the message passing. 1248965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1258965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1268965ea79SLois Curfman McInnes this is a lot of wasted space. 1278965ea79SLois Curfman McInnes 1288965ea79SLois Curfman McInnes This could be done better. 1298965ea79SLois Curfman McInnes */ 1300452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 1318965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 1320452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 1338965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 1348965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 13539ddd567SLois Curfman McInnes MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1368965ea79SLois Curfman McInnes comm,recv_waits+i); 1378965ea79SLois Curfman McInnes } 1388965ea79SLois Curfman McInnes 1398965ea79SLois Curfman McInnes /* do sends: 1408965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1418965ea79SLois Curfman McInnes the ith processor 1428965ea79SLois Curfman McInnes */ 1430452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 14439ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1450452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1468965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1470452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1488965ea79SLois Curfman McInnes starts[0] = 0; 1498965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15039ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 15139ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 15239ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 15339ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1548965ea79SLois Curfman McInnes } 1550452661fSBarry Smith PetscFree(owner); 1568965ea79SLois Curfman McInnes starts[0] = 0; 1578965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1588965ea79SLois Curfman McInnes count = 0; 1598965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1608965ea79SLois Curfman McInnes if (procs[i]) { 16139ddd567SLois Curfman McInnes MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1628965ea79SLois Curfman McInnes comm,send_waits+count++); 1638965ea79SLois Curfman McInnes } 1648965ea79SLois Curfman McInnes } 1650452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1668965ea79SLois Curfman McInnes 1678965ea79SLois Curfman McInnes /* Free cache space */ 16839ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1698965ea79SLois Curfman McInnes 17039ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 17139ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 17239ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 17339ddd567SLois Curfman McInnes mdn->rmax = nmax; 1748965ea79SLois Curfman McInnes 1758965ea79SLois Curfman McInnes return 0; 1768965ea79SLois Curfman McInnes } 17739ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1788965ea79SLois Curfman McInnes 17939ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1808965ea79SLois Curfman McInnes { 18139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1828965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 18339ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1848965ea79SLois Curfman McInnes Scalar *values,val; 18539ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1868965ea79SLois Curfman McInnes 1878965ea79SLois Curfman McInnes /* wait on receives */ 1888965ea79SLois Curfman McInnes while (count) { 18939ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 1908965ea79SLois Curfman McInnes /* unpack receives into our local space */ 19139ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 1928965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 1938965ea79SLois Curfman McInnes n = n/3; 1948965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 195227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - mdn->rstart; 196227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 1978965ea79SLois Curfman McInnes val = values[3*i+2]; 19839ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 19939ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 2008965ea79SLois Curfman McInnes } 20139ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 2028965ea79SLois Curfman McInnes } 2038965ea79SLois Curfman McInnes count--; 2048965ea79SLois Curfman McInnes } 2050452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2068965ea79SLois Curfman McInnes 2078965ea79SLois Curfman McInnes /* wait on sends */ 20839ddd567SLois Curfman McInnes if (mdn->nsends) { 2090452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 2108965ea79SLois Curfman McInnes CHKPTRQ(send_status); 21139ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2120452661fSBarry Smith PetscFree(send_status); 2138965ea79SLois Curfman McInnes } 2140452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2158965ea79SLois Curfman McInnes 21639ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 21739ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 21839ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2198965ea79SLois Curfman McInnes 220227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 22139ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2228965ea79SLois Curfman McInnes } 2238965ea79SLois Curfman McInnes return 0; 2248965ea79SLois Curfman McInnes } 2258965ea79SLois Curfman McInnes 22639ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2278965ea79SLois Curfman McInnes { 22839ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 22939ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2308965ea79SLois Curfman McInnes } 2318965ea79SLois Curfman McInnes 2328965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2338965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2348965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2353501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2368965ea79SLois Curfman McInnes routine. 2378965ea79SLois Curfman McInnes */ 23839ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2398965ea79SLois Curfman McInnes { 24039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2418965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2428965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2438965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2448965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2458965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2468965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2478965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2488965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2498965ea79SLois Curfman McInnes IS istmp; 2508965ea79SLois Curfman McInnes 2518965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 2528965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2538965ea79SLois Curfman McInnes 2548965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2550452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 256cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2570452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2588965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2598965ea79SLois Curfman McInnes idx = rows[i]; 2608965ea79SLois Curfman McInnes found = 0; 2618965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2628965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2638965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2648965ea79SLois Curfman McInnes } 2658965ea79SLois Curfman McInnes } 26639ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2678965ea79SLois Curfman McInnes } 2688965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2698965ea79SLois Curfman McInnes 2708965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2710452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2728965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2738965ea79SLois Curfman McInnes nrecvs = work[rank]; 2748965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2758965ea79SLois Curfman McInnes nmax = work[rank]; 2760452661fSBarry Smith PetscFree(work); 2778965ea79SLois Curfman McInnes 2788965ea79SLois Curfman McInnes /* post receives: */ 2790452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2808965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2810452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2828965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2838965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2848965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2858965ea79SLois Curfman McInnes } 2868965ea79SLois Curfman McInnes 2878965ea79SLois Curfman McInnes /* do sends: 2888965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 2898965ea79SLois Curfman McInnes the ith processor 2908965ea79SLois Curfman McInnes */ 2910452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 2920452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 2938965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 2940452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 2958965ea79SLois Curfman McInnes starts[0] = 0; 2968965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2978965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2988965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 2998965ea79SLois Curfman McInnes } 3008965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3018965ea79SLois Curfman McInnes 3028965ea79SLois Curfman McInnes starts[0] = 0; 3038965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3048965ea79SLois Curfman McInnes count = 0; 3058965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3068965ea79SLois Curfman McInnes if (procs[i]) { 3078965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3088965ea79SLois Curfman McInnes } 3098965ea79SLois Curfman McInnes } 3100452661fSBarry Smith PetscFree(starts); 3118965ea79SLois Curfman McInnes 3128965ea79SLois Curfman McInnes base = owners[rank]; 3138965ea79SLois Curfman McInnes 3148965ea79SLois Curfman McInnes /* wait on receives */ 3150452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3168965ea79SLois Curfman McInnes source = lens + nrecvs; 3178965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3188965ea79SLois Curfman McInnes while (count) { 3198965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3208965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3218965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3228965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3238965ea79SLois Curfman McInnes lens[imdex] = n; 3248965ea79SLois Curfman McInnes slen += n; 3258965ea79SLois Curfman McInnes count--; 3268965ea79SLois Curfman McInnes } 3270452661fSBarry Smith PetscFree(recv_waits); 3288965ea79SLois Curfman McInnes 3298965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3300452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3318965ea79SLois Curfman McInnes count = 0; 3328965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3338965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3348965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3358965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3368965ea79SLois Curfman McInnes } 3378965ea79SLois Curfman McInnes } 3380452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3390452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3408965ea79SLois Curfman McInnes 3418965ea79SLois Curfman McInnes /* actually zap the local rows */ 3428965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3438965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3440452661fSBarry Smith PetscFree(lrows); 3458965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3468965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3478965ea79SLois Curfman McInnes 3488965ea79SLois Curfman McInnes /* wait on sends */ 3498965ea79SLois Curfman McInnes if (nsends) { 3500452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3518965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3528965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3530452661fSBarry Smith PetscFree(send_status); 3548965ea79SLois Curfman McInnes } 3550452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3568965ea79SLois Curfman McInnes 3578965ea79SLois Curfman McInnes return 0; 3588965ea79SLois Curfman McInnes } 3598965ea79SLois Curfman McInnes 36039ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3618965ea79SLois Curfman McInnes { 36239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3638965ea79SLois Curfman McInnes int ierr; 364c456f294SBarry Smith 36539ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 36639ddd567SLois Curfman McInnes CHKERRQ(ierr); 36739ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 36839ddd567SLois Curfman McInnes CHKERRQ(ierr); 36939ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3708965ea79SLois Curfman McInnes return 0; 3718965ea79SLois Curfman McInnes } 3728965ea79SLois Curfman McInnes 37339ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3748965ea79SLois Curfman McInnes { 37539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3768965ea79SLois Curfman McInnes int ierr; 377c456f294SBarry Smith 378c456f294SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 379c456f294SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 38039ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3818965ea79SLois Curfman McInnes return 0; 3828965ea79SLois Curfman McInnes } 3838965ea79SLois Curfman McInnes 384096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 385096963f5SLois Curfman McInnes { 386096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 387096963f5SLois Curfman McInnes int ierr; 3883501a2bdSLois Curfman McInnes Scalar zero = 0.0; 389096963f5SLois Curfman McInnes 3903501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 391096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 392096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 393096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 394096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 395096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 396096963f5SLois Curfman McInnes return 0; 397096963f5SLois Curfman McInnes } 398096963f5SLois Curfman McInnes 399096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 400096963f5SLois Curfman McInnes { 401096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 402096963f5SLois Curfman McInnes int ierr; 403096963f5SLois Curfman McInnes 4043501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 4053501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 406096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 407096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 408096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 409096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 410096963f5SLois Curfman McInnes return 0; 411096963f5SLois Curfman McInnes } 412096963f5SLois Curfman McInnes 41339ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4148965ea79SLois Curfman McInnes { 41539ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 416096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 417096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 418096963f5SLois Curfman McInnes Scalar *x; 419ed3cc1f0SBarry Smith 420096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 421096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 422096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 423096963f5SLois Curfman McInnes radd = a->rstart*m*m; 424096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 425096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 426096963f5SLois Curfman McInnes } 427096963f5SLois Curfman McInnes return 0; 4288965ea79SLois Curfman McInnes } 4298965ea79SLois Curfman McInnes 43039ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4318965ea79SLois Curfman McInnes { 4328965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4333501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4348965ea79SLois Curfman McInnes int ierr; 435ed3cc1f0SBarry Smith 4368965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4373501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4388965ea79SLois Curfman McInnes #endif 4390452661fSBarry Smith PetscFree(mdn->rowners); 4403501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4413501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4423501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 443622d7880SLois Curfman McInnes if (mdn->factor) { 444622d7880SLois Curfman McInnes if (mdn->factor->temp) PetscFree(mdn->factor->temp); 445622d7880SLois Curfman McInnes if (mdn->factor->tag) PetscFree(mdn->factor->tag); 446622d7880SLois Curfman McInnes if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 447622d7880SLois Curfman McInnes PetscFree(mdn->factor); 448622d7880SLois Curfman McInnes } 4490452661fSBarry Smith PetscFree(mdn); 4508965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4510452661fSBarry Smith PetscHeaderDestroy(mat); 4528965ea79SLois Curfman McInnes return 0; 4538965ea79SLois Curfman McInnes } 45439ddd567SLois Curfman McInnes 4558965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4568965ea79SLois Curfman McInnes 45739ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4588965ea79SLois Curfman McInnes { 45939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4608965ea79SLois Curfman McInnes int ierr; 46139ddd567SLois Curfman McInnes if (mdn->size == 1) { 46239ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4638965ea79SLois Curfman McInnes } 46439ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4658965ea79SLois Curfman McInnes return 0; 4668965ea79SLois Curfman McInnes } 4678965ea79SLois Curfman McInnes 46839ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4698965ea79SLois Curfman McInnes { 47039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 47139ddd567SLois Curfman McInnes int ierr, format; 4728965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 4738965ea79SLois Curfman McInnes FILE *fd; 4748965ea79SLois Curfman McInnes 475227d817aSBarry Smith ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 4768965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 4778965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 4783501a2bdSLois Curfman McInnes if (format == FILE_FORMAT_INFO_DETAILED) { 479096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 480096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 481096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 482096963f5SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 4833501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 484096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 485096963f5SLois Curfman McInnes fflush(fd); 486096963f5SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 4873501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 4883501a2bdSLois Curfman McInnes return 0; 4893501a2bdSLois Curfman McInnes } 4903501a2bdSLois Curfman McInnes else if (format == FILE_FORMAT_INFO) { 491096963f5SLois Curfman McInnes return 0; 4928965ea79SLois Curfman McInnes } 4938965ea79SLois Curfman McInnes } 4948965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 4958965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 49639ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 49739ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 49839ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4998965ea79SLois Curfman McInnes fflush(fd); 5008965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 5018965ea79SLois Curfman McInnes } 5028965ea79SLois Curfman McInnes else { 50339ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5048965ea79SLois Curfman McInnes if (size == 1) { 50539ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5068965ea79SLois Curfman McInnes } 5078965ea79SLois Curfman McInnes else { 5088965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5098965ea79SLois Curfman McInnes Mat A; 51039ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 51139ddd567SLois Curfman McInnes Scalar *vals; 51239ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5138965ea79SLois Curfman McInnes 5148965ea79SLois Curfman McInnes if (!rank) { 515b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5168965ea79SLois Curfman McInnes } 5178965ea79SLois Curfman McInnes else { 518b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5198965ea79SLois Curfman McInnes } 5208965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5218965ea79SLois Curfman McInnes 52239ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 52339ddd567SLois Curfman McInnes but it's quick for now */ 52439ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5258965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 52639ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 52739ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 52839ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 52939ddd567SLois Curfman McInnes row++; 5308965ea79SLois Curfman McInnes } 5318965ea79SLois Curfman McInnes 5328965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5338965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5348965ea79SLois Curfman McInnes if (!rank) { 53539ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5368965ea79SLois Curfman McInnes } 5378965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5388965ea79SLois Curfman McInnes } 5398965ea79SLois Curfman McInnes } 5408965ea79SLois Curfman McInnes return 0; 5418965ea79SLois Curfman McInnes } 5428965ea79SLois Curfman McInnes 54339ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5448965ea79SLois Curfman McInnes { 5458965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 5468965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 54739ddd567SLois Curfman McInnes int ierr; 5488965ea79SLois Curfman McInnes 5498965ea79SLois Curfman McInnes if (!viewer) { 5508965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 5518965ea79SLois Curfman McInnes } 55239ddd567SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 55339ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5548965ea79SLois Curfman McInnes } 5558965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 55639ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5578965ea79SLois Curfman McInnes } 5588965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 55939ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5608965ea79SLois Curfman McInnes } 5618965ea79SLois Curfman McInnes return 0; 5628965ea79SLois Curfman McInnes } 5638965ea79SLois Curfman McInnes 5643501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5658965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5668965ea79SLois Curfman McInnes { 5673501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5683501a2bdSLois Curfman McInnes Mat mdn = mat->A; 56939ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5708965ea79SLois Curfman McInnes 5713501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5728965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 5738965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 5748965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5753501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 5768965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5778965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5783501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 5798965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 5808965ea79SLois Curfman McInnes } 5818965ea79SLois Curfman McInnes return 0; 5828965ea79SLois Curfman McInnes } 5838965ea79SLois Curfman McInnes 5848c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 5858aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 5868aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 5878aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 5888c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 5898aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 5908aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 5918aaee692SLois Curfman McInnes 59239ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 5938965ea79SLois Curfman McInnes { 59439ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 5958965ea79SLois Curfman McInnes 5968965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 5978965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 5988965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 5998965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 6008965ea79SLois Curfman McInnes MatSetOption(a->A,op); 6018965ea79SLois Curfman McInnes } 6028965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 6038965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 6048965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 6058965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 60639ddd567SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6078965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 60839b7565bSBarry Smith { a->roworiented = 0; MatSetOption(a->A,op);} 6098965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 61039ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 6118965ea79SLois Curfman McInnes else 61239ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 6138965ea79SLois Curfman McInnes return 0; 6148965ea79SLois Curfman McInnes } 6158965ea79SLois Curfman McInnes 6163501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6178965ea79SLois Curfman McInnes { 6183501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6198965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6208965ea79SLois Curfman McInnes return 0; 6218965ea79SLois Curfman McInnes } 6228965ea79SLois Curfman McInnes 6233501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6248965ea79SLois Curfman McInnes { 6253501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6268965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6278965ea79SLois Curfman McInnes return 0; 6288965ea79SLois Curfman McInnes } 6298965ea79SLois Curfman McInnes 6303501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6318965ea79SLois Curfman McInnes { 6323501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6338965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 6348965ea79SLois Curfman McInnes return 0; 6358965ea79SLois Curfman McInnes } 6368965ea79SLois Curfman McInnes 6373501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6388965ea79SLois Curfman McInnes { 6393501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 64039ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6418965ea79SLois Curfman McInnes 64239ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6438965ea79SLois Curfman McInnes lrow = row - rstart; 64439ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6458965ea79SLois Curfman McInnes } 6468965ea79SLois Curfman McInnes 64739ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6488965ea79SLois Curfman McInnes { 6490452661fSBarry Smith if (idx) PetscFree(*idx); 6500452661fSBarry Smith if (v) PetscFree(*v); 6518965ea79SLois Curfman McInnes return 0; 6528965ea79SLois Curfman McInnes } 6538965ea79SLois Curfman McInnes 654cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 655096963f5SLois Curfman McInnes { 6563501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6573501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6583501a2bdSLois Curfman McInnes int ierr, i, j; 6593501a2bdSLois Curfman McInnes double sum = 0.0; 6603501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6613501a2bdSLois Curfman McInnes 6623501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6633501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6643501a2bdSLois Curfman McInnes } else { 6653501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6663501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6673501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6683501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6693501a2bdSLois Curfman McInnes #else 6703501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6713501a2bdSLois Curfman McInnes #endif 6723501a2bdSLois Curfman McInnes } 6733501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6743501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6753501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6763501a2bdSLois Curfman McInnes } 6773501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6783501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6790452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6803501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 681cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 682096963f5SLois Curfman McInnes *norm = 0.0; 6833501a2bdSLois Curfman McInnes v = mat->v; 6843501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 6853501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 68667e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 6873501a2bdSLois Curfman McInnes } 6883501a2bdSLois Curfman McInnes } 6893501a2bdSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 6903501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 6913501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 6923501a2bdSLois Curfman McInnes } 6930452661fSBarry Smith PetscFree(tmp); 6943501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 6953501a2bdSLois Curfman McInnes } 6963501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 6973501a2bdSLois Curfman McInnes double ntemp; 6983501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 6993501a2bdSLois Curfman McInnes MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 7003501a2bdSLois Curfman McInnes } 7013501a2bdSLois Curfman McInnes else { 7023501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 7033501a2bdSLois Curfman McInnes } 7043501a2bdSLois Curfman McInnes } 7053501a2bdSLois Curfman McInnes return 0; 7063501a2bdSLois Curfman McInnes } 7073501a2bdSLois Curfman McInnes 7083501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7093501a2bdSLois Curfman McInnes { 7103501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7113501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7123501a2bdSLois Curfman McInnes Mat B; 7133501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 7143501a2bdSLois Curfman McInnes int j, i, ierr; 7153501a2bdSLois Curfman McInnes Scalar *v; 7163501a2bdSLois Curfman McInnes 717*3638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 7183501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 719b4fd4287SBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 720ed2daf61SLois Curfman McInnes CHKERRQ(ierr); 7213501a2bdSLois Curfman McInnes 7223501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 7230452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 7243501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 7253501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 7263501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 7273501a2bdSLois Curfman McInnes v += m; 7283501a2bdSLois Curfman McInnes } 7290452661fSBarry Smith PetscFree(rwork); 7303501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7313501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 732*3638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 7333501a2bdSLois Curfman McInnes *matout = B; 7343501a2bdSLois Curfman McInnes } else { 7353501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 7360452661fSBarry Smith PetscFree(a->rowners); 7373501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7383501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7393501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7400452661fSBarry Smith PetscFree(a); 7413501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7420452661fSBarry Smith PetscHeaderDestroy(B); 7433501a2bdSLois Curfman McInnes } 744096963f5SLois Curfman McInnes return 0; 745096963f5SLois Curfman McInnes } 746096963f5SLois Curfman McInnes 7473d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 7488965ea79SLois Curfman McInnes 7498965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 75039ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 75139ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 75239ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 753096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 754e7ca0642SLois Curfman McInnes /* MatSolve_MPIDense,0, */ 755e7ca0642SLois Curfman McInnes 0,0, 75639ddd567SLois Curfman McInnes 0,0, 7578c469469SLois Curfman McInnes 0,0, 7588c469469SLois Curfman McInnes /* MatLUFactor_MPIDense,0, */ 7598aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 76039ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 761096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 76239ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7638965ea79SLois Curfman McInnes 0, 76439ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 7658c469469SLois Curfman McInnes 0,0,0, 7668c469469SLois Curfman McInnes /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 7678aaee692SLois Curfman McInnes 0,0, 76839ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 76939ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 77039ddd567SLois Curfman McInnes 0,0, 7713d1612f7SBarry Smith 0,0,0,0,0,MatConvertSameType_MPIDense, 772b49de8d1SLois Curfman McInnes 0,0,0,0,0, 773b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIDense}; 7748965ea79SLois Curfman McInnes 7758965ea79SLois Curfman McInnes /*@C 77639ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7778965ea79SLois Curfman McInnes 7788965ea79SLois Curfman McInnes Input Parameters: 7798965ea79SLois Curfman McInnes . comm - MPI communicator 7808965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 7818965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 7828965ea79SLois Curfman McInnes if N is given) 7838965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 7848965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 7858965ea79SLois Curfman McInnes if n is given) 786b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 787dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 7888965ea79SLois Curfman McInnes 7898965ea79SLois Curfman McInnes Output Parameter: 7908965ea79SLois Curfman McInnes . newmat - the matrix 7918965ea79SLois Curfman McInnes 7928965ea79SLois Curfman McInnes Notes: 79339ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 79439ddd567SLois Curfman McInnes storage by columns. 7958965ea79SLois Curfman McInnes 79618f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 79718f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 798b4fd4287SBarry Smith set data=PETSC_NULL. 79918f449edSLois Curfman McInnes 8008965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 8018965ea79SLois Curfman McInnes (possibly both). 8028965ea79SLois Curfman McInnes 8033501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 8043501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 8053501a2bdSLois Curfman McInnes 80639ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 8078965ea79SLois Curfman McInnes 80839ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 8098965ea79SLois Curfman McInnes @*/ 81018f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 8118965ea79SLois Curfman McInnes { 8128965ea79SLois Curfman McInnes Mat mat; 81339ddd567SLois Curfman McInnes Mat_MPIDense *a; 81425cdf11fSBarry Smith int ierr, i,flg; 8158965ea79SLois Curfman McInnes 816ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 817ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 81818f449edSLois Curfman McInnes 8198965ea79SLois Curfman McInnes *newmat = 0; 8200452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 8218965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8220452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8238965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 82439ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 82539ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8268965ea79SLois Curfman McInnes mat->factor = 0; 8278965ea79SLois Curfman McInnes 828622d7880SLois Curfman McInnes a->factor = 0; 8298965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8308965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 8318965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 8328965ea79SLois Curfman McInnes 83339ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 8348965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 83539ddd567SLois Curfman McInnes 83639ddd567SLois Curfman McInnes /* each row stores all columns */ 83739ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 838d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 839d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 8408965ea79SLois Curfman McInnes a->N = N; 8418965ea79SLois Curfman McInnes a->M = M; 84239ddd567SLois Curfman McInnes a->m = m; 84339ddd567SLois Curfman McInnes a->n = n; 8448965ea79SLois Curfman McInnes 8458965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 846d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 847d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 848d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 84939ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8508965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8518965ea79SLois Curfman McInnes a->rowners[0] = 0; 8528965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8538965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8548965ea79SLois Curfman McInnes } 8558965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8568965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 857d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 858d7e8b826SBarry Smith a->cowners[0] = 0; 859d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 860d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 861d7e8b826SBarry Smith } 8628965ea79SLois Curfman McInnes 86318f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8648965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8658965ea79SLois Curfman McInnes 8668965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8678965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8688965ea79SLois Curfman McInnes 8698965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8708965ea79SLois Curfman McInnes a->lvec = 0; 8718965ea79SLois Curfman McInnes a->Mvctx = 0; 87239b7565bSBarry Smith a->roworiented = 1; 8738965ea79SLois Curfman McInnes 8748965ea79SLois Curfman McInnes *newmat = mat; 87525cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 87625cdf11fSBarry Smith if (flg) { 8778c469469SLois Curfman McInnes ierr = MatPrintHelp(mat); CHKERRQ(ierr); 8788c469469SLois Curfman McInnes } 8798965ea79SLois Curfman McInnes return 0; 8808965ea79SLois Curfman McInnes } 8818965ea79SLois Curfman McInnes 8823d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 8838965ea79SLois Curfman McInnes { 8848965ea79SLois Curfman McInnes Mat mat; 8853501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 88639ddd567SLois Curfman McInnes int ierr; 8872ba99913SLois Curfman McInnes FactorCtx *factor; 8888965ea79SLois Curfman McInnes 8898965ea79SLois Curfman McInnes *newmat = 0; 8900452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 8918965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8920452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8938965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 89439ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 89539ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8963501a2bdSLois Curfman McInnes mat->factor = A->factor; 897c456f294SBarry Smith mat->assembled = PETSC_TRUE; 8988965ea79SLois Curfman McInnes 8998965ea79SLois Curfman McInnes a->m = oldmat->m; 9008965ea79SLois Curfman McInnes a->n = oldmat->n; 9018965ea79SLois Curfman McInnes a->M = oldmat->M; 9028965ea79SLois Curfman McInnes a->N = oldmat->N; 9032ba99913SLois Curfman McInnes if (oldmat->factor) { 9042ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 9052ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 9062ba99913SLois Curfman McInnes } else a->factor = 0; 9078965ea79SLois Curfman McInnes 9088965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 9098965ea79SLois Curfman McInnes a->rend = oldmat->rend; 9108965ea79SLois Curfman McInnes a->size = oldmat->size; 9118965ea79SLois Curfman McInnes a->rank = oldmat->rank; 9128965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 9138965ea79SLois Curfman McInnes 9140452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 91539ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9168965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 9178965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 9188965ea79SLois Curfman McInnes 9198965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 9208965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 92155659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 9228965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 9238965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 9248965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9258965ea79SLois Curfman McInnes *newmat = mat; 9268965ea79SLois Curfman McInnes return 0; 9278965ea79SLois Curfman McInnes } 9288965ea79SLois Curfman McInnes 9298965ea79SLois Curfman McInnes #include "sysio.h" 9308965ea79SLois Curfman McInnes 93139ddd567SLois Curfman McInnes int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 9328965ea79SLois Curfman McInnes { 9338965ea79SLois Curfman McInnes Mat A; 9348965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 9358965ea79SLois Curfman McInnes Scalar *vals,*svals; 9368965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 9378965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 9388965ea79SLois Curfman McInnes MPI_Status status; 9398965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 9408965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 9418965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 9428965ea79SLois Curfman McInnes 9438965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 9448965ea79SLois Curfman McInnes if (!rank) { 9458965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 9468965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 94739ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 9488965ea79SLois Curfman McInnes } 9498965ea79SLois Curfman McInnes 9508965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 9518965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 9528965ea79SLois Curfman McInnes /* determine ownership of all rows */ 9538965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 9540452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 9558965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 9568965ea79SLois Curfman McInnes rowners[0] = 0; 9578965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 9588965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 9598965ea79SLois Curfman McInnes } 9608965ea79SLois Curfman McInnes rstart = rowners[rank]; 9618965ea79SLois Curfman McInnes rend = rowners[rank+1]; 9628965ea79SLois Curfman McInnes 9638965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 9640452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 9658965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 9668965ea79SLois Curfman McInnes if (!rank) { 9670452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 9688965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 9690452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 9708965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 9718965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 9720452661fSBarry Smith PetscFree(sndcounts); 9738965ea79SLois Curfman McInnes } 9748965ea79SLois Curfman McInnes else { 9758965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 9768965ea79SLois Curfman McInnes } 9778965ea79SLois Curfman McInnes 9788965ea79SLois Curfman McInnes if (!rank) { 9798965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 9800452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 981cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 9828965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9838965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 9848965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 9858965ea79SLois Curfman McInnes } 9868965ea79SLois Curfman McInnes } 9870452661fSBarry Smith PetscFree(rowlengths); 9888965ea79SLois Curfman McInnes 9898965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 9908965ea79SLois Curfman McInnes maxnz = 0; 9918965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 9920452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 9938965ea79SLois Curfman McInnes } 9940452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 9958965ea79SLois Curfman McInnes 9968965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 9978965ea79SLois Curfman McInnes nz = procsnz[0]; 9980452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 9998965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 10008965ea79SLois Curfman McInnes 10018965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 10028965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10038965ea79SLois Curfman McInnes nz = procsnz[i]; 10048965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 10058965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 10068965ea79SLois Curfman McInnes } 10070452661fSBarry Smith PetscFree(cols); 10088965ea79SLois Curfman McInnes } 10098965ea79SLois Curfman McInnes else { 10108965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 10118965ea79SLois Curfman McInnes nz = 0; 10128965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10138965ea79SLois Curfman McInnes nz += ourlens[i]; 10148965ea79SLois Curfman McInnes } 10150452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 10168965ea79SLois Curfman McInnes 10178965ea79SLois Curfman McInnes /* receive message of column indices*/ 10188965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 10198965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 102039ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10218965ea79SLois Curfman McInnes } 10228965ea79SLois Curfman McInnes 10238965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1024cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 10258965ea79SLois Curfman McInnes jj = 0; 10268965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10278965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 10288965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 10298965ea79SLois Curfman McInnes jj++; 10308965ea79SLois Curfman McInnes } 10318965ea79SLois Curfman McInnes } 10328965ea79SLois Curfman McInnes 10338965ea79SLois Curfman McInnes /* create our matrix */ 10348965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10358965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 10368965ea79SLois Curfman McInnes } 103739ddd567SLois Curfman McInnes if (type == MATMPIDENSE) { 1038b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 10398965ea79SLois Curfman McInnes } 10408965ea79SLois Curfman McInnes A = *newmat; 10418965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10428965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 10438965ea79SLois Curfman McInnes } 10448965ea79SLois Curfman McInnes 10458965ea79SLois Curfman McInnes if (!rank) { 10460452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 10478965ea79SLois Curfman McInnes 10488965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 10498965ea79SLois Curfman McInnes nz = procsnz[0]; 10508965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10518965ea79SLois Curfman McInnes 10528965ea79SLois Curfman McInnes /* insert into matrix */ 10538965ea79SLois Curfman McInnes jj = rstart; 10548965ea79SLois Curfman McInnes smycols = mycols; 10558965ea79SLois Curfman McInnes svals = vals; 10568965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10578965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10588965ea79SLois Curfman McInnes smycols += ourlens[i]; 10598965ea79SLois Curfman McInnes svals += ourlens[i]; 10608965ea79SLois Curfman McInnes jj++; 10618965ea79SLois Curfman McInnes } 10628965ea79SLois Curfman McInnes 10638965ea79SLois Curfman McInnes /* read in other processors and ship out */ 10648965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10658965ea79SLois Curfman McInnes nz = procsnz[i]; 10668965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 10678965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 10688965ea79SLois Curfman McInnes } 10690452661fSBarry Smith PetscFree(procsnz); 10708965ea79SLois Curfman McInnes } 10718965ea79SLois Curfman McInnes else { 10728965ea79SLois Curfman McInnes /* receive numeric values */ 10730452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 10748965ea79SLois Curfman McInnes 10758965ea79SLois Curfman McInnes /* receive message of values*/ 10768965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 10778965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 107839ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 10798965ea79SLois Curfman McInnes 10808965ea79SLois Curfman McInnes /* insert into matrix */ 10818965ea79SLois Curfman McInnes jj = rstart; 10828965ea79SLois Curfman McInnes smycols = mycols; 10838965ea79SLois Curfman McInnes svals = vals; 10848965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 10858965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 10868965ea79SLois Curfman McInnes smycols += ourlens[i]; 10878965ea79SLois Curfman McInnes svals += ourlens[i]; 10888965ea79SLois Curfman McInnes jj++; 10898965ea79SLois Curfman McInnes } 10908965ea79SLois Curfman McInnes } 10910452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 10928965ea79SLois Curfman McInnes 10938965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10948965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 10958965ea79SLois Curfman McInnes return 0; 10968965ea79SLois Curfman McInnes } 1097