18965ea79SLois Curfman McInnes #ifndef lint 2*94a424c1SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.35 1996/03/19 21:25:49 bsmith Exp bsmith $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 98965ea79SLois Curfman McInnes #include "mpidense.h" 108965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 118965ea79SLois Curfman McInnes 1239ddd567SLois Curfman McInnes static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 138965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 148965ea79SLois Curfman McInnes { 1539b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1639b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1739b7565bSBarry Smith int roworiented = A->roworiented; 188965ea79SLois Curfman McInnes 1939b7565bSBarry Smith if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 2039ddd567SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 218965ea79SLois Curfman McInnes } 2239b7565bSBarry Smith A->insertmode = addv; 238965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 2439ddd567SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 2539b7565bSBarry Smith if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 268965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 278965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2839b7565bSBarry Smith if (roworiented) { 2939b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 3039b7565bSBarry Smith } 3139b7565bSBarry Smith else { 328965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 3339ddd567SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 3439b7565bSBarry Smith if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 3539b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3639b7565bSBarry Smith } 378965ea79SLois Curfman McInnes } 388965ea79SLois Curfman McInnes } 398965ea79SLois Curfman McInnes else { 4039b7565bSBarry Smith if (roworiented) { 4139b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 4239b7565bSBarry Smith } 4339b7565bSBarry Smith else { /* must stash each seperately */ 4439b7565bSBarry Smith row = idxm[i]; 4539b7565bSBarry Smith for ( j=0; j<n; j++ ) { 4639b7565bSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 4739b7565bSBarry Smith CHKERRQ(ierr); 4839b7565bSBarry Smith } 4939b7565bSBarry Smith } 50b49de8d1SLois Curfman McInnes } 51b49de8d1SLois Curfman McInnes } 52b49de8d1SLois Curfman McInnes return 0; 53b49de8d1SLois Curfman McInnes } 54b49de8d1SLois Curfman McInnes 55b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 56b49de8d1SLois Curfman McInnes { 57b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 58b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 59b49de8d1SLois Curfman McInnes 60b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 61b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 62b49de8d1SLois Curfman McInnes if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 63b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 64b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 65b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 66b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 67b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 68b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 69b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 70b49de8d1SLois Curfman McInnes } 71b49de8d1SLois Curfman McInnes } 72b49de8d1SLois Curfman McInnes else { 73b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 748965ea79SLois Curfman McInnes } 758965ea79SLois Curfman McInnes } 768965ea79SLois Curfman McInnes return 0; 778965ea79SLois Curfman McInnes } 788965ea79SLois Curfman McInnes 79ff14e315SSatish Balay static int MatGetArray_MPIDense(Mat A,Scalar **array) 80ff14e315SSatish Balay { 81ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82ff14e315SSatish Balay int ierr; 83ff14e315SSatish Balay 84ff14e315SSatish Balay ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85ff14e315SSatish Balay return 0; 86ff14e315SSatish Balay } 87ff14e315SSatish Balay 88ff14e315SSatish Balay static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 89ff14e315SSatish Balay { 90ff14e315SSatish Balay return 0; 91ff14e315SSatish Balay } 92ff14e315SSatish Balay 9339ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 948965ea79SLois Curfman McInnes { 9539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 968965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 9739ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 988965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 9939ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 1008965ea79SLois Curfman McInnes InsertMode addv; 10139ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 1028965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 1038965ea79SLois Curfman McInnes 1048965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 1056d84be18SBarry Smith MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 10639ddd567SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 10739ddd567SLois Curfman McInnes "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 1088965ea79SLois Curfman McInnes } 10939ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 1108965ea79SLois Curfman McInnes 1118965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 1120452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 113cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1140452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 11539ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 11639ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1178965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1188965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1198965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1208965ea79SLois Curfman McInnes } 1218965ea79SLois Curfman McInnes } 1228965ea79SLois Curfman McInnes } 1238965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1248965ea79SLois Curfman McInnes 1258965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1260452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1276d84be18SBarry Smith MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 1288965ea79SLois Curfman McInnes nreceives = work[rank]; 1296d84be18SBarry Smith MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 1308965ea79SLois Curfman McInnes nmax = work[rank]; 1310452661fSBarry Smith PetscFree(work); 1328965ea79SLois Curfman McInnes 1338965ea79SLois Curfman McInnes /* post receives: 1348965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1358965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1368965ea79SLois Curfman McInnes to simplify the message passing. 1378965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1388965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1398965ea79SLois Curfman McInnes this is a lot of wasted space. 1408965ea79SLois Curfman McInnes 1418965ea79SLois Curfman McInnes This could be done better. 1428965ea79SLois Curfman McInnes */ 1430452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 1448965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 1450452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 1468965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 1478965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 1486d84be18SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1498965ea79SLois Curfman McInnes comm,recv_waits+i); 1508965ea79SLois Curfman McInnes } 1518965ea79SLois Curfman McInnes 1528965ea79SLois Curfman McInnes /* do sends: 1538965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1548965ea79SLois Curfman McInnes the ith processor 1558965ea79SLois Curfman McInnes */ 1560452661fSBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 15739ddd567SLois Curfman McInnes CHKPTRQ(svalues); 1580452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 1598965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 1600452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1618965ea79SLois Curfman McInnes starts[0] = 0; 1628965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16339ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 16439ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 16539ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 16639ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1678965ea79SLois Curfman McInnes } 1680452661fSBarry Smith PetscFree(owner); 1698965ea79SLois Curfman McInnes starts[0] = 0; 1708965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1718965ea79SLois Curfman McInnes count = 0; 1728965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1738965ea79SLois Curfman McInnes if (procs[i]) { 1746d84be18SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 1758965ea79SLois Curfman McInnes comm,send_waits+count++); 1768965ea79SLois Curfman McInnes } 1778965ea79SLois Curfman McInnes } 1780452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1798965ea79SLois Curfman McInnes 1808965ea79SLois Curfman McInnes /* Free cache space */ 18139ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1828965ea79SLois Curfman McInnes 18339ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 18439ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 18539ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 18639ddd567SLois Curfman McInnes mdn->rmax = nmax; 1878965ea79SLois Curfman McInnes 1888965ea79SLois Curfman McInnes return 0; 1898965ea79SLois Curfman McInnes } 19039ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1918965ea79SLois Curfman McInnes 19239ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1938965ea79SLois Curfman McInnes { 19439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1958965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 19639ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 1978965ea79SLois Curfman McInnes Scalar *values,val; 19839ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 1998965ea79SLois Curfman McInnes 2008965ea79SLois Curfman McInnes /* wait on receives */ 2018965ea79SLois Curfman McInnes while (count) { 20239ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 2038965ea79SLois Curfman McInnes /* unpack receives into our local space */ 20439ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 2058965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2068965ea79SLois Curfman McInnes n = n/3; 2078965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 208227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - mdn->rstart; 209227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2108965ea79SLois Curfman McInnes val = values[3*i+2]; 21139ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 21239ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 2138965ea79SLois Curfman McInnes } 21439ddd567SLois Curfman McInnes else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 2158965ea79SLois Curfman McInnes } 2168965ea79SLois Curfman McInnes count--; 2178965ea79SLois Curfman McInnes } 2180452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2198965ea79SLois Curfman McInnes 2208965ea79SLois Curfman McInnes /* wait on sends */ 22139ddd567SLois Curfman McInnes if (mdn->nsends) { 2220452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 2238965ea79SLois Curfman McInnes CHKPTRQ(send_status); 22439ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2250452661fSBarry Smith PetscFree(send_status); 2268965ea79SLois Curfman McInnes } 2270452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2288965ea79SLois Curfman McInnes 22939ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 23039ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 23139ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2328965ea79SLois Curfman McInnes 233227d817aSBarry Smith if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 23439ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2358965ea79SLois Curfman McInnes } 2368965ea79SLois Curfman McInnes return 0; 2378965ea79SLois Curfman McInnes } 2388965ea79SLois Curfman McInnes 23939ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2408965ea79SLois Curfman McInnes { 24139ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 24239ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2438965ea79SLois Curfman McInnes } 2448965ea79SLois Curfman McInnes 2458965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2468965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2478965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2483501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2498965ea79SLois Curfman McInnes routine. 2508965ea79SLois Curfman McInnes */ 25139ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2528965ea79SLois Curfman McInnes { 25339ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2548965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2558965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2568965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2578965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2588965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2598965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2608965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2618965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2628965ea79SLois Curfman McInnes IS istmp; 2638965ea79SLois Curfman McInnes 26477c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 2658965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2668965ea79SLois Curfman McInnes 2678965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2680452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 269cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2700452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2718965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2728965ea79SLois Curfman McInnes idx = rows[i]; 2738965ea79SLois Curfman McInnes found = 0; 2748965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2758965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2768965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2778965ea79SLois Curfman McInnes } 2788965ea79SLois Curfman McInnes } 27939ddd567SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 2808965ea79SLois Curfman McInnes } 2818965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2828965ea79SLois Curfman McInnes 2838965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 2840452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 2858965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 2868965ea79SLois Curfman McInnes nrecvs = work[rank]; 2878965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 2888965ea79SLois Curfman McInnes nmax = work[rank]; 2890452661fSBarry Smith PetscFree(work); 2908965ea79SLois Curfman McInnes 2918965ea79SLois Curfman McInnes /* post receives: */ 2920452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 2938965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 2940452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 2958965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 2968965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 2978965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 2988965ea79SLois Curfman McInnes } 2998965ea79SLois Curfman McInnes 3008965ea79SLois Curfman McInnes /* do sends: 3018965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 3028965ea79SLois Curfman McInnes the ith processor 3038965ea79SLois Curfman McInnes */ 3040452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3050452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 3068965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 3070452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3088965ea79SLois Curfman McInnes starts[0] = 0; 3098965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3108965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 3118965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3128965ea79SLois Curfman McInnes } 3138965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3148965ea79SLois Curfman McInnes 3158965ea79SLois Curfman McInnes starts[0] = 0; 3168965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3178965ea79SLois Curfman McInnes count = 0; 3188965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3198965ea79SLois Curfman McInnes if (procs[i]) { 3208965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3218965ea79SLois Curfman McInnes } 3228965ea79SLois Curfman McInnes } 3230452661fSBarry Smith PetscFree(starts); 3248965ea79SLois Curfman McInnes 3258965ea79SLois Curfman McInnes base = owners[rank]; 3268965ea79SLois Curfman McInnes 3278965ea79SLois Curfman McInnes /* wait on receives */ 3280452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3298965ea79SLois Curfman McInnes source = lens + nrecvs; 3308965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3318965ea79SLois Curfman McInnes while (count) { 3328965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3338965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3348965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3358965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3368965ea79SLois Curfman McInnes lens[imdex] = n; 3378965ea79SLois Curfman McInnes slen += n; 3388965ea79SLois Curfman McInnes count--; 3398965ea79SLois Curfman McInnes } 3400452661fSBarry Smith PetscFree(recv_waits); 3418965ea79SLois Curfman McInnes 3428965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3430452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3448965ea79SLois Curfman McInnes count = 0; 3458965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3468965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3478965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3488965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3498965ea79SLois Curfman McInnes } 3508965ea79SLois Curfman McInnes } 3510452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3520452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3538965ea79SLois Curfman McInnes 3548965ea79SLois Curfman McInnes /* actually zap the local rows */ 3558965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3568965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3570452661fSBarry Smith PetscFree(lrows); 3588965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3598965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3608965ea79SLois Curfman McInnes 3618965ea79SLois Curfman McInnes /* wait on sends */ 3628965ea79SLois Curfman McInnes if (nsends) { 3630452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 3648965ea79SLois Curfman McInnes CHKPTRQ(send_status); 3658965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3660452661fSBarry Smith PetscFree(send_status); 3678965ea79SLois Curfman McInnes } 3680452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3698965ea79SLois Curfman McInnes 3708965ea79SLois Curfman McInnes return 0; 3718965ea79SLois Curfman McInnes } 3728965ea79SLois Curfman McInnes 37339ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3748965ea79SLois Curfman McInnes { 37539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3768965ea79SLois Curfman McInnes int ierr; 377c456f294SBarry Smith 37839ddd567SLois Curfman McInnes ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 37939ddd567SLois Curfman McInnes CHKERRQ(ierr); 38039ddd567SLois Curfman McInnes ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 38139ddd567SLois Curfman McInnes CHKERRQ(ierr); 38239ddd567SLois Curfman McInnes ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3838965ea79SLois Curfman McInnes return 0; 3848965ea79SLois Curfman McInnes } 3858965ea79SLois Curfman McInnes 38639ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 3878965ea79SLois Curfman McInnes { 38839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3898965ea79SLois Curfman McInnes int ierr; 390c456f294SBarry Smith 391c456f294SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 392c456f294SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 39339ddd567SLois Curfman McInnes ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 3948965ea79SLois Curfman McInnes return 0; 3958965ea79SLois Curfman McInnes } 3968965ea79SLois Curfman McInnes 397096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 398096963f5SLois Curfman McInnes { 399096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 400096963f5SLois Curfman McInnes int ierr; 4013501a2bdSLois Curfman McInnes Scalar zero = 0.0; 402096963f5SLois Curfman McInnes 4033501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 404096963f5SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 405096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 406096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 407096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 408096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 409096963f5SLois Curfman McInnes return 0; 410096963f5SLois Curfman McInnes } 411096963f5SLois Curfman McInnes 412096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 413096963f5SLois Curfman McInnes { 414096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 415096963f5SLois Curfman McInnes int ierr; 416096963f5SLois Curfman McInnes 4173501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 4183501a2bdSLois Curfman McInnes ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 419096963f5SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 420096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 421096963f5SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 422096963f5SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 423096963f5SLois Curfman McInnes return 0; 424096963f5SLois Curfman McInnes } 425096963f5SLois Curfman McInnes 42639ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4278965ea79SLois Curfman McInnes { 42839ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 429096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 430096963f5SLois Curfman McInnes int ierr, i, n, m = a->m, radd; 431096963f5SLois Curfman McInnes Scalar *x; 432ed3cc1f0SBarry Smith 433096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 434096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 435096963f5SLois Curfman McInnes if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 436096963f5SLois Curfman McInnes radd = a->rstart*m*m; 437096963f5SLois Curfman McInnes for ( i=0; i<m; i++ ) { 438096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 439096963f5SLois Curfman McInnes } 440096963f5SLois Curfman McInnes return 0; 4418965ea79SLois Curfman McInnes } 4428965ea79SLois Curfman McInnes 44339ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4448965ea79SLois Curfman McInnes { 4458965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4463501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4478965ea79SLois Curfman McInnes int ierr; 448ed3cc1f0SBarry Smith 4498965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4503501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4518965ea79SLois Curfman McInnes #endif 4520452661fSBarry Smith PetscFree(mdn->rowners); 4533501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4543501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4553501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 456622d7880SLois Curfman McInnes if (mdn->factor) { 457622d7880SLois Curfman McInnes if (mdn->factor->temp) PetscFree(mdn->factor->temp); 458622d7880SLois Curfman McInnes if (mdn->factor->tag) PetscFree(mdn->factor->tag); 459622d7880SLois Curfman McInnes if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 460622d7880SLois Curfman McInnes PetscFree(mdn->factor); 461622d7880SLois Curfman McInnes } 4620452661fSBarry Smith PetscFree(mdn); 4638965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4640452661fSBarry Smith PetscHeaderDestroy(mat); 4658965ea79SLois Curfman McInnes return 0; 4668965ea79SLois Curfman McInnes } 46739ddd567SLois Curfman McInnes 4688965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4698965ea79SLois Curfman McInnes 47039ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4718965ea79SLois Curfman McInnes { 47239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4738965ea79SLois Curfman McInnes int ierr; 47439ddd567SLois Curfman McInnes if (mdn->size == 1) { 47539ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 4768965ea79SLois Curfman McInnes } 47739ddd567SLois Curfman McInnes else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 4788965ea79SLois Curfman McInnes return 0; 4798965ea79SLois Curfman McInnes } 4808965ea79SLois Curfman McInnes 48139ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 4828965ea79SLois Curfman McInnes { 48339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 48439ddd567SLois Curfman McInnes int ierr, format; 4858965ea79SLois Curfman McInnes FILE *fd; 48619bcc07fSBarry Smith ViewerType vtype; 4878965ea79SLois Curfman McInnes 48819bcc07fSBarry Smith ViewerGetType(viewer,&vtype); 48990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 49090ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 49190ace30eSBarry Smith if (format == ASCII_FORMAT_INFO_DETAILED) { 492096963f5SLois Curfman McInnes int nz, nzalloc, mem, rank; 493096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 494096963f5SLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 49577c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 4963501a2bdSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 497096963f5SLois Curfman McInnes rank,mdn->m,nz,nzalloc,mem); 498096963f5SLois Curfman McInnes fflush(fd); 49977c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5003501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 5013501a2bdSLois Curfman McInnes return 0; 5023501a2bdSLois Curfman McInnes } 50390ace30eSBarry Smith else if (format == ASCII_FORMAT_INFO) { 504096963f5SLois Curfman McInnes return 0; 5058965ea79SLois Curfman McInnes } 50619bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 50777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 50839ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 50939ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 51039ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5118965ea79SLois Curfman McInnes fflush(fd); 51277c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5138965ea79SLois Curfman McInnes } 5148965ea79SLois Curfman McInnes else { 51539ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5168965ea79SLois Curfman McInnes if (size == 1) { 51739ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5188965ea79SLois Curfman McInnes } 5198965ea79SLois Curfman McInnes else { 5208965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5218965ea79SLois Curfman McInnes Mat A; 52239ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 52339ddd567SLois Curfman McInnes Scalar *vals; 52439ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5258965ea79SLois Curfman McInnes 5268965ea79SLois Curfman McInnes if (!rank) { 527b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5288965ea79SLois Curfman McInnes } 5298965ea79SLois Curfman McInnes else { 530b4fd4287SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 5318965ea79SLois Curfman McInnes } 5328965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5338965ea79SLois Curfman McInnes 53439ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 53539ddd567SLois Curfman McInnes but it's quick for now */ 53639ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5378965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 53839ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 53939ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 54039ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 54139ddd567SLois Curfman McInnes row++; 5428965ea79SLois Curfman McInnes } 5438965ea79SLois Curfman McInnes 5448965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5458965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 5468965ea79SLois Curfman McInnes if (!rank) { 54739ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5488965ea79SLois Curfman McInnes } 5498965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5508965ea79SLois Curfman McInnes } 5518965ea79SLois Curfman McInnes } 5528965ea79SLois Curfman McInnes return 0; 5538965ea79SLois Curfman McInnes } 5548965ea79SLois Curfman McInnes 55539ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5568965ea79SLois Curfman McInnes { 5578965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 55839ddd567SLois Curfman McInnes int ierr; 559bcd2baecSBarry Smith ViewerType vtype; 5608965ea79SLois Curfman McInnes 5618965ea79SLois Curfman McInnes if (!viewer) { 56219bcc07fSBarry Smith viewer = STDOUT_VIEWER_SELF; 5638965ea79SLois Curfman McInnes } 564bcd2baecSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 565bcd2baecSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 56639ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5678965ea79SLois Curfman McInnes } 568bcd2baecSBarry Smith else if (vtype == ASCII_FILES_VIEWER) { 56939ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5708965ea79SLois Curfman McInnes } 571bcd2baecSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 57239ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 5738965ea79SLois Curfman McInnes } 5748965ea79SLois Curfman McInnes return 0; 5758965ea79SLois Curfman McInnes } 5768965ea79SLois Curfman McInnes 5773501a2bdSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 5788965ea79SLois Curfman McInnes int *nzalloc,int *mem) 5798965ea79SLois Curfman McInnes { 5803501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 5813501a2bdSLois Curfman McInnes Mat mdn = mat->A; 58239ddd567SLois Curfman McInnes int ierr, isend[3], irecv[3]; 5838965ea79SLois Curfman McInnes 5843501a2bdSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 5858965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 586bcd2baecSBarry Smith if (nz) *nz = isend[0]; 587bcd2baecSBarry Smith if (nzalloc) *nzalloc = isend[1]; 588bcd2baecSBarry Smith if (mem) *mem = isend[2]; 5898965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 5903501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 591bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 592bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 593bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 5948965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 5953501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 596bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 597bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 598bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 5998965ea79SLois Curfman McInnes } 6008965ea79SLois Curfman McInnes return 0; 6018965ea79SLois Curfman McInnes } 6028965ea79SLois Curfman McInnes 6038c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 6048aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 6058aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 6068aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 6078c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 6088aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 6098aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 6108aaee692SLois Curfman McInnes 61139ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 6128965ea79SLois Curfman McInnes { 61339ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6148965ea79SLois Curfman McInnes 6158965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 6168965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 6178965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 6188965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 6198965ea79SLois Curfman McInnes MatSetOption(a->A,op); 6208965ea79SLois Curfman McInnes } 6218965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 6228965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 6238965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 6248965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 625*94a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6268965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 62739b7565bSBarry Smith { a->roworiented = 0; MatSetOption(a->A,op);} 6288965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 62939ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 6308965ea79SLois Curfman McInnes else 63139ddd567SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 6328965ea79SLois Curfman McInnes return 0; 6338965ea79SLois Curfman McInnes } 6348965ea79SLois Curfman McInnes 6353501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6368965ea79SLois Curfman McInnes { 6373501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6388965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6398965ea79SLois Curfman McInnes return 0; 6408965ea79SLois Curfman McInnes } 6418965ea79SLois Curfman McInnes 6423501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 6438965ea79SLois Curfman McInnes { 6443501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6458965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 6468965ea79SLois Curfman McInnes return 0; 6478965ea79SLois Curfman McInnes } 6488965ea79SLois Curfman McInnes 6493501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 6508965ea79SLois Curfman McInnes { 6513501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6528965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 6538965ea79SLois Curfman McInnes return 0; 6548965ea79SLois Curfman McInnes } 6558965ea79SLois Curfman McInnes 6563501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 6578965ea79SLois Curfman McInnes { 6583501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 65939ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 6608965ea79SLois Curfman McInnes 66139ddd567SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 6628965ea79SLois Curfman McInnes lrow = row - rstart; 66339ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 6648965ea79SLois Curfman McInnes } 6658965ea79SLois Curfman McInnes 66639ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 6678965ea79SLois Curfman McInnes { 6680452661fSBarry Smith if (idx) PetscFree(*idx); 6690452661fSBarry Smith if (v) PetscFree(*v); 6708965ea79SLois Curfman McInnes return 0; 6718965ea79SLois Curfman McInnes } 6728965ea79SLois Curfman McInnes 673cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 674096963f5SLois Curfman McInnes { 6753501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 6763501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 6773501a2bdSLois Curfman McInnes int ierr, i, j; 6783501a2bdSLois Curfman McInnes double sum = 0.0; 6793501a2bdSLois Curfman McInnes Scalar *v = mat->v; 6803501a2bdSLois Curfman McInnes 6813501a2bdSLois Curfman McInnes if (mdn->size == 1) { 6823501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 6833501a2bdSLois Curfman McInnes } else { 6843501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 6853501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 6863501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 6873501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 6883501a2bdSLois Curfman McInnes #else 6893501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 6903501a2bdSLois Curfman McInnes #endif 6913501a2bdSLois Curfman McInnes } 6926d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 6933501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 6943501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 6953501a2bdSLois Curfman McInnes } 6963501a2bdSLois Curfman McInnes else if (type == NORM_1) { 6973501a2bdSLois Curfman McInnes double *tmp, *tmp2; 6980452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 6993501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 700cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 701096963f5SLois Curfman McInnes *norm = 0.0; 7023501a2bdSLois Curfman McInnes v = mat->v; 7033501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 7043501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 70567e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 7063501a2bdSLois Curfman McInnes } 7073501a2bdSLois Curfman McInnes } 7086d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 7093501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 7103501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 7113501a2bdSLois Curfman McInnes } 7120452661fSBarry Smith PetscFree(tmp); 7133501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 7143501a2bdSLois Curfman McInnes } 7153501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 7163501a2bdSLois Curfman McInnes double ntemp; 7173501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 7186d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 7193501a2bdSLois Curfman McInnes } 7203501a2bdSLois Curfman McInnes else { 7213501a2bdSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 7223501a2bdSLois Curfman McInnes } 7233501a2bdSLois Curfman McInnes } 7243501a2bdSLois Curfman McInnes return 0; 7253501a2bdSLois Curfman McInnes } 7263501a2bdSLois Curfman McInnes 7273501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7283501a2bdSLois Curfman McInnes { 7293501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7303501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7313501a2bdSLois Curfman McInnes Mat B; 7323501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 7333501a2bdSLois Curfman McInnes int j, i, ierr; 7343501a2bdSLois Curfman McInnes Scalar *v; 7353501a2bdSLois Curfman McInnes 7363638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 7373501a2bdSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 738b4fd4287SBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 739ed2daf61SLois Curfman McInnes CHKERRQ(ierr); 7403501a2bdSLois Curfman McInnes 7413501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 7420452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 7433501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 7443501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 7453501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 7463501a2bdSLois Curfman McInnes v += m; 7473501a2bdSLois Curfman McInnes } 7480452661fSBarry Smith PetscFree(rwork); 7493501a2bdSLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7503501a2bdSLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 7513638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 7523501a2bdSLois Curfman McInnes *matout = B; 7533501a2bdSLois Curfman McInnes } else { 7543501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 7550452661fSBarry Smith PetscFree(a->rowners); 7563501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 7573501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 7583501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 7590452661fSBarry Smith PetscFree(a); 7603501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 7610452661fSBarry Smith PetscHeaderDestroy(B); 7623501a2bdSLois Curfman McInnes } 763096963f5SLois Curfman McInnes return 0; 764096963f5SLois Curfman McInnes } 765096963f5SLois Curfman McInnes 7663d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 7678965ea79SLois Curfman McInnes 7688965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 76939ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 77039ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 77139ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 772096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 773e7ca0642SLois Curfman McInnes /* MatSolve_MPIDense,0, */ 774e7ca0642SLois Curfman McInnes 0,0, 77539ddd567SLois Curfman McInnes 0,0, 7768c469469SLois Curfman McInnes 0,0, 7778c469469SLois Curfman McInnes /* MatLUFactor_MPIDense,0, */ 7788aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 77939ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 780096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 78139ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 7828965ea79SLois Curfman McInnes 0, 78339ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 7848c469469SLois Curfman McInnes 0,0,0, 7858c469469SLois Curfman McInnes /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 7868aaee692SLois Curfman McInnes 0,0, 78739ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 78839ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 789ff14e315SSatish Balay 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 790ff14e315SSatish Balay 0,0,0,MatConvertSameType_MPIDense, 791b49de8d1SLois Curfman McInnes 0,0,0,0,0, 792b49de8d1SLois Curfman McInnes 0,0,MatGetValues_MPIDense}; 7938965ea79SLois Curfman McInnes 7948965ea79SLois Curfman McInnes /*@C 79539ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 7968965ea79SLois Curfman McInnes 7978965ea79SLois Curfman McInnes Input Parameters: 7988965ea79SLois Curfman McInnes . comm - MPI communicator 7998965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 8008965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 8018965ea79SLois Curfman McInnes if N is given) 8028965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 8038965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 8048965ea79SLois Curfman McInnes if n is given) 805b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 806dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 8078965ea79SLois Curfman McInnes 8088965ea79SLois Curfman McInnes Output Parameter: 8098965ea79SLois Curfman McInnes . newmat - the matrix 8108965ea79SLois Curfman McInnes 8118965ea79SLois Curfman McInnes Notes: 81239ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 81339ddd567SLois Curfman McInnes storage by columns. 8148965ea79SLois Curfman McInnes 81518f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 81618f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 817b4fd4287SBarry Smith set data=PETSC_NULL. 81818f449edSLois Curfman McInnes 8198965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 8208965ea79SLois Curfman McInnes (possibly both). 8218965ea79SLois Curfman McInnes 8223501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 8233501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 8243501a2bdSLois Curfman McInnes 82539ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 8268965ea79SLois Curfman McInnes 82739ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 8288965ea79SLois Curfman McInnes @*/ 82918f449edSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *newmat) 8308965ea79SLois Curfman McInnes { 8318965ea79SLois Curfman McInnes Mat mat; 83239ddd567SLois Curfman McInnes Mat_MPIDense *a; 83325cdf11fSBarry Smith int ierr, i,flg; 8348965ea79SLois Curfman McInnes 835ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 836ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 83718f449edSLois Curfman McInnes 8388965ea79SLois Curfman McInnes *newmat = 0; 8390452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 8408965ea79SLois Curfman McInnes PLogObjectCreate(mat); 8410452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 8428965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 84339ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 84439ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 8458965ea79SLois Curfman McInnes mat->factor = 0; 8468965ea79SLois Curfman McInnes 847622d7880SLois Curfman McInnes a->factor = 0; 8488965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 8498965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 8508965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 8518965ea79SLois Curfman McInnes 85239ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 8538965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 85439ddd567SLois Curfman McInnes 85539ddd567SLois Curfman McInnes /* each row stores all columns */ 85639ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 857d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 858d7e8b826SBarry Smith /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 8598965ea79SLois Curfman McInnes a->N = N; 8608965ea79SLois Curfman McInnes a->M = M; 86139ddd567SLois Curfman McInnes a->m = m; 86239ddd567SLois Curfman McInnes a->n = n; 8638965ea79SLois Curfman McInnes 8648965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 865d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 866d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 867d7e8b826SBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 86839ddd567SLois Curfman McInnes sizeof(Mat_MPIDense)); 8698965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 8708965ea79SLois Curfman McInnes a->rowners[0] = 0; 8718965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 8728965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 8738965ea79SLois Curfman McInnes } 8748965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 8758965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 876d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 877d7e8b826SBarry Smith a->cowners[0] = 0; 878d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 879d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 880d7e8b826SBarry Smith } 8818965ea79SLois Curfman McInnes 88218f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 8838965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 8848965ea79SLois Curfman McInnes 8858965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 8868965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 8878965ea79SLois Curfman McInnes 8888965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 8898965ea79SLois Curfman McInnes a->lvec = 0; 8908965ea79SLois Curfman McInnes a->Mvctx = 0; 89139b7565bSBarry Smith a->roworiented = 1; 8928965ea79SLois Curfman McInnes 8938965ea79SLois Curfman McInnes *newmat = mat; 89425cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 89525cdf11fSBarry Smith if (flg) { 8968c469469SLois Curfman McInnes ierr = MatPrintHelp(mat); CHKERRQ(ierr); 8978c469469SLois Curfman McInnes } 8988965ea79SLois Curfman McInnes return 0; 8998965ea79SLois Curfman McInnes } 9008965ea79SLois Curfman McInnes 9013d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 9028965ea79SLois Curfman McInnes { 9038965ea79SLois Curfman McInnes Mat mat; 9043501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 90539ddd567SLois Curfman McInnes int ierr; 9062ba99913SLois Curfman McInnes FactorCtx *factor; 9078965ea79SLois Curfman McInnes 9088965ea79SLois Curfman McInnes *newmat = 0; 9090452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 9108965ea79SLois Curfman McInnes PLogObjectCreate(mat); 9110452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 9128965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 91339ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 91439ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 9153501a2bdSLois Curfman McInnes mat->factor = A->factor; 916c456f294SBarry Smith mat->assembled = PETSC_TRUE; 9178965ea79SLois Curfman McInnes 9188965ea79SLois Curfman McInnes a->m = oldmat->m; 9198965ea79SLois Curfman McInnes a->n = oldmat->n; 9208965ea79SLois Curfman McInnes a->M = oldmat->M; 9218965ea79SLois Curfman McInnes a->N = oldmat->N; 9222ba99913SLois Curfman McInnes if (oldmat->factor) { 9232ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 9242ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 9252ba99913SLois Curfman McInnes } else a->factor = 0; 9268965ea79SLois Curfman McInnes 9278965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 9288965ea79SLois Curfman McInnes a->rend = oldmat->rend; 9298965ea79SLois Curfman McInnes a->size = oldmat->size; 9308965ea79SLois Curfman McInnes a->rank = oldmat->rank; 9318965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 9328965ea79SLois Curfman McInnes 9330452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 93439ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9358965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 9368965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 9378965ea79SLois Curfman McInnes 9388965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 9398965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 94055659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 9418965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 9428965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 9438965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9448965ea79SLois Curfman McInnes *newmat = mat; 9458965ea79SLois Curfman McInnes return 0; 9468965ea79SLois Curfman McInnes } 9478965ea79SLois Curfman McInnes 94877c4ece6SBarry Smith #include "sys.h" 9498965ea79SLois Curfman McInnes 95090ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 95190ace30eSBarry Smith { 95290ace30eSBarry Smith int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 95390ace30eSBarry Smith Scalar *array,*vals,*vals_ptr; 95490ace30eSBarry Smith MPI_Status status; 95590ace30eSBarry Smith 95690ace30eSBarry Smith MPI_Comm_rank(comm,&rank); 95790ace30eSBarry Smith MPI_Comm_size(comm,&size); 95890ace30eSBarry Smith 95990ace30eSBarry Smith /* determine ownership of all rows */ 96090ace30eSBarry Smith m = M/size + ((M % size) > rank); 96190ace30eSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 96290ace30eSBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 96390ace30eSBarry Smith rowners[0] = 0; 96490ace30eSBarry Smith for ( i=2; i<=size; i++ ) { 96590ace30eSBarry Smith rowners[i] += rowners[i-1]; 96690ace30eSBarry Smith } 96790ace30eSBarry Smith rstart = rowners[rank]; 96890ace30eSBarry Smith rend = rowners[rank+1]; 96990ace30eSBarry Smith 97090ace30eSBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 97190ace30eSBarry Smith ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 97290ace30eSBarry Smith 97390ace30eSBarry Smith if (!rank) { 97490ace30eSBarry Smith vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 97590ace30eSBarry Smith 97690ace30eSBarry Smith /* read in my part of the matrix numerical values */ 97777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 97890ace30eSBarry Smith 97990ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 98090ace30eSBarry Smith vals_ptr = vals; 98190ace30eSBarry Smith for ( i=0; i<m; i++ ) { 98290ace30eSBarry Smith for ( j=0; j<N; j++ ) { 98390ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 98490ace30eSBarry Smith } 98590ace30eSBarry Smith } 98690ace30eSBarry Smith 98790ace30eSBarry Smith /* read in other processors and ship out */ 98890ace30eSBarry Smith for ( i=1; i<size; i++ ) { 98990ace30eSBarry Smith nz = (rowners[i+1] - rowners[i])*N; 99077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 99190ace30eSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 99290ace30eSBarry Smith } 99390ace30eSBarry Smith } 99490ace30eSBarry Smith else { 99590ace30eSBarry Smith /* receive numeric values */ 99690ace30eSBarry Smith vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 99790ace30eSBarry Smith 99890ace30eSBarry Smith /* receive message of values*/ 99990ace30eSBarry Smith MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 100090ace30eSBarry Smith 100190ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 100290ace30eSBarry Smith vals_ptr = vals; 100390ace30eSBarry Smith for ( i=0; i<m; i++ ) { 100490ace30eSBarry Smith for ( j=0; j<N; j++ ) { 100590ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 100690ace30eSBarry Smith } 100790ace30eSBarry Smith } 100890ace30eSBarry Smith } 100990ace30eSBarry Smith PetscFree(rowners); 101090ace30eSBarry Smith PetscFree(vals); 101190ace30eSBarry Smith ierr = MatAssemblyBegin(*newmat,FINAL_ASSEMBLY); CHKERRQ(ierr); 101290ace30eSBarry Smith ierr = MatAssemblyEnd(*newmat,FINAL_ASSEMBLY); CHKERRQ(ierr); 101390ace30eSBarry Smith return 0; 101490ace30eSBarry Smith } 101590ace30eSBarry Smith 101690ace30eSBarry Smith 101719bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 10188965ea79SLois Curfman McInnes { 10198965ea79SLois Curfman McInnes Mat A; 10208965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 10218965ea79SLois Curfman McInnes Scalar *vals,*svals; 102219bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 10238965ea79SLois Curfman McInnes MPI_Status status; 10248965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 10258965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 102619bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 10278965ea79SLois Curfman McInnes 10288965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 10298965ea79SLois Curfman McInnes if (!rank) { 103090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 103177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 103239ddd567SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 10338965ea79SLois Curfman McInnes } 10348965ea79SLois Curfman McInnes 10358965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 103690ace30eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 103790ace30eSBarry Smith 103890ace30eSBarry Smith /* 103990ace30eSBarry Smith Handle case where matrix is stored on disk as a dense matrix 104090ace30eSBarry Smith */ 104190ace30eSBarry Smith if (nz == MATRIX_BINARY_FORMAT_DENSE) { 104290ace30eSBarry Smith return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 104390ace30eSBarry Smith } 104490ace30eSBarry Smith 10458965ea79SLois Curfman McInnes /* determine ownership of all rows */ 10468965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 10470452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 10488965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 10498965ea79SLois Curfman McInnes rowners[0] = 0; 10508965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 10518965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 10528965ea79SLois Curfman McInnes } 10538965ea79SLois Curfman McInnes rstart = rowners[rank]; 10548965ea79SLois Curfman McInnes rend = rowners[rank+1]; 10558965ea79SLois Curfman McInnes 10568965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 10570452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 10588965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 10598965ea79SLois Curfman McInnes if (!rank) { 10600452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 106177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 10620452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 10638965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 10648965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 10650452661fSBarry Smith PetscFree(sndcounts); 10668965ea79SLois Curfman McInnes } 10678965ea79SLois Curfman McInnes else { 10688965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 10698965ea79SLois Curfman McInnes } 10708965ea79SLois Curfman McInnes 10718965ea79SLois Curfman McInnes if (!rank) { 10728965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 10730452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1074cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 10758965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 10768965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 10778965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 10788965ea79SLois Curfman McInnes } 10798965ea79SLois Curfman McInnes } 10800452661fSBarry Smith PetscFree(rowlengths); 10818965ea79SLois Curfman McInnes 10828965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 10838965ea79SLois Curfman McInnes maxnz = 0; 10848965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 10850452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 10868965ea79SLois Curfman McInnes } 10870452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 10888965ea79SLois Curfman McInnes 10898965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 10908965ea79SLois Curfman McInnes nz = procsnz[0]; 10910452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 109277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 10938965ea79SLois Curfman McInnes 10948965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 10958965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 10968965ea79SLois Curfman McInnes nz = procsnz[i]; 109777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 10988965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 10998965ea79SLois Curfman McInnes } 11000452661fSBarry Smith PetscFree(cols); 11018965ea79SLois Curfman McInnes } 11028965ea79SLois Curfman McInnes else { 11038965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 11048965ea79SLois Curfman McInnes nz = 0; 11058965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11068965ea79SLois Curfman McInnes nz += ourlens[i]; 11078965ea79SLois Curfman McInnes } 11080452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 11098965ea79SLois Curfman McInnes 11108965ea79SLois Curfman McInnes /* receive message of column indices*/ 11118965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 11128965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 111339ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 11148965ea79SLois Curfman McInnes } 11158965ea79SLois Curfman McInnes 11168965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1117cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 11188965ea79SLois Curfman McInnes jj = 0; 11198965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11208965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 11218965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 11228965ea79SLois Curfman McInnes jj++; 11238965ea79SLois Curfman McInnes } 11248965ea79SLois Curfman McInnes } 11258965ea79SLois Curfman McInnes 11268965ea79SLois Curfman McInnes /* create our matrix */ 11278965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11288965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 11298965ea79SLois Curfman McInnes } 1130b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 11318965ea79SLois Curfman McInnes A = *newmat; 11328965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11338965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 11348965ea79SLois Curfman McInnes } 11358965ea79SLois Curfman McInnes 11368965ea79SLois Curfman McInnes if (!rank) { 11370452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 11388965ea79SLois Curfman McInnes 11398965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 11408965ea79SLois Curfman McInnes nz = procsnz[0]; 114177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 11428965ea79SLois Curfman McInnes 11438965ea79SLois Curfman McInnes /* insert into matrix */ 11448965ea79SLois Curfman McInnes jj = rstart; 11458965ea79SLois Curfman McInnes smycols = mycols; 11468965ea79SLois Curfman McInnes svals = vals; 11478965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11488965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 11498965ea79SLois Curfman McInnes smycols += ourlens[i]; 11508965ea79SLois Curfman McInnes svals += ourlens[i]; 11518965ea79SLois Curfman McInnes jj++; 11528965ea79SLois Curfman McInnes } 11538965ea79SLois Curfman McInnes 11548965ea79SLois Curfman McInnes /* read in other processors and ship out */ 11558965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 11568965ea79SLois Curfman McInnes nz = procsnz[i]; 115777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 11588965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 11598965ea79SLois Curfman McInnes } 11600452661fSBarry Smith PetscFree(procsnz); 11618965ea79SLois Curfman McInnes } 11628965ea79SLois Curfman McInnes else { 11638965ea79SLois Curfman McInnes /* receive numeric values */ 11640452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 11658965ea79SLois Curfman McInnes 11668965ea79SLois Curfman McInnes /* receive message of values*/ 11678965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 11688965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 116939ddd567SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 11708965ea79SLois Curfman McInnes 11718965ea79SLois Curfman McInnes /* insert into matrix */ 11728965ea79SLois Curfman McInnes jj = rstart; 11738965ea79SLois Curfman McInnes smycols = mycols; 11748965ea79SLois Curfman McInnes svals = vals; 11758965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11768965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 11778965ea79SLois Curfman McInnes smycols += ourlens[i]; 11788965ea79SLois Curfman McInnes svals += ourlens[i]; 11798965ea79SLois Curfman McInnes jj++; 11808965ea79SLois Curfman McInnes } 11818965ea79SLois Curfman McInnes } 11820452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 11838965ea79SLois Curfman McInnes 11848965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 11858965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 11868965ea79SLois Curfman McInnes return 0; 11878965ea79SLois Curfman McInnes } 118890ace30eSBarry Smith 118990ace30eSBarry Smith 119090ace30eSBarry Smith 119190ace30eSBarry Smith 119290ace30eSBarry Smith 1193