18965ea79SLois Curfman McInnes #ifndef lint 2*0513a670SBarry Smith static char vcid[] = "$Id: mpidense.c,v 1.62 1997/01/06 20:24:18 balay Exp bsmith $"; 38965ea79SLois Curfman McInnes #endif 48965ea79SLois Curfman McInnes 5ed3cc1f0SBarry Smith /* 6ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 7ed3cc1f0SBarry Smith */ 8ed3cc1f0SBarry Smith 970f55243SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h" 10f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 118965ea79SLois Curfman McInnes 125615d1e5SSatish Balay #undef __FUNC__ 135615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIDense" 147056b6fcSBarry Smith static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 158965ea79SLois Curfman McInnes { 1639b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 1739b7565bSBarry Smith int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 1839b7565bSBarry Smith int roworiented = A->roworiented; 198965ea79SLois Curfman McInnes 2039b7565bSBarry Smith if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 21e3372554SBarry Smith SETERRQ(1,0,"Cannot mix inserts and adds"); 228965ea79SLois Curfman McInnes } 2339b7565bSBarry Smith A->insertmode = addv; 248965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 25e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 26e3372554SBarry Smith if (idxm[i] >= A->M) SETERRQ(1,0,"Row too large"); 278965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 288965ea79SLois Curfman McInnes row = idxm[i] - rstart; 2939b7565bSBarry Smith if (roworiented) { 3039b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 3139b7565bSBarry Smith } 3239b7565bSBarry Smith else { 338965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 34e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 35e3372554SBarry Smith if (idxn[j] >= A->N) SETERRQ(1,0,"Column too large"); 3639b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 3739b7565bSBarry Smith } 388965ea79SLois Curfman McInnes } 398965ea79SLois Curfman McInnes } 408965ea79SLois Curfman McInnes else { 4139b7565bSBarry Smith if (roworiented) { 4239b7565bSBarry Smith ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 4339b7565bSBarry Smith } 4439b7565bSBarry Smith else { /* must stash each seperately */ 4539b7565bSBarry Smith row = idxm[i]; 4639b7565bSBarry Smith for ( j=0; j<n; j++ ) { 477056b6fcSBarry Smith ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 4839b7565bSBarry Smith } 4939b7565bSBarry Smith } 50b49de8d1SLois Curfman McInnes } 51b49de8d1SLois Curfman McInnes } 52b49de8d1SLois Curfman McInnes return 0; 53b49de8d1SLois Curfman McInnes } 54b49de8d1SLois Curfman McInnes 555615d1e5SSatish Balay #undef __FUNC__ 565615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIDense" 57b49de8d1SLois Curfman McInnes static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 58b49de8d1SLois Curfman McInnes { 59b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 60b49de8d1SLois Curfman McInnes int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 61b49de8d1SLois Curfman McInnes 62b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 63e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 64e3372554SBarry Smith if (idxm[i] >= mdn->M) SETERRQ(1,0,"Row too large"); 65b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 66b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 67b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 68e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 69b49de8d1SLois Curfman McInnes if (idxn[j] >= mdn->N) 70e3372554SBarry Smith SETERRQ(1,0,"Column too large"); 71b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 72b49de8d1SLois Curfman McInnes } 73b49de8d1SLois Curfman McInnes } 74b49de8d1SLois Curfman McInnes else { 75e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 768965ea79SLois Curfman McInnes } 778965ea79SLois Curfman McInnes } 788965ea79SLois Curfman McInnes return 0; 798965ea79SLois Curfman McInnes } 808965ea79SLois Curfman McInnes 815615d1e5SSatish Balay #undef __FUNC__ 825615d1e5SSatish Balay #define __FUNC__ "MatGetArray_MPIDense" 83ff14e315SSatish Balay static int MatGetArray_MPIDense(Mat A,Scalar **array) 84ff14e315SSatish Balay { 85ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *) A->data; 86ff14e315SSatish Balay int ierr; 87ff14e315SSatish Balay 88ff14e315SSatish Balay ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 89ff14e315SSatish Balay return 0; 90ff14e315SSatish Balay } 91ff14e315SSatish Balay 925615d1e5SSatish Balay #undef __FUNC__ 935615d1e5SSatish Balay #define __FUNC__ "MatRestoreArray_MPIDense" 94ff14e315SSatish Balay static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 95ff14e315SSatish Balay { 96ff14e315SSatish Balay return 0; 97ff14e315SSatish Balay } 98ff14e315SSatish Balay 995615d1e5SSatish Balay #undef __FUNC__ 1005615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIDense" 10139ddd567SLois Curfman McInnes static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 1028965ea79SLois Curfman McInnes { 10339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 1048965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 10539ddd567SLois Curfman McInnes int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 1068965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 10739ddd567SLois Curfman McInnes int tag = mat->tag, *owner,*starts,count,ierr; 1088965ea79SLois Curfman McInnes InsertMode addv; 10939ddd567SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 1108965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 1118965ea79SLois Curfman McInnes 1128965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 1136d84be18SBarry Smith MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 1147056b6fcSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 115e3372554SBarry Smith SETERRQ(1,0,"Cannot mix adds/inserts on different procs"); 1168965ea79SLois Curfman McInnes } 11739ddd567SLois Curfman McInnes mdn->insertmode = addv; /* in case this processor had no cache */ 1188965ea79SLois Curfman McInnes 1198965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 1200452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 121cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1220452661fSBarry Smith owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 12339ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 12439ddd567SLois Curfman McInnes idx = mdn->stash.idx[i]; 1258965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 1268965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 1278965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1288965ea79SLois Curfman McInnes } 1298965ea79SLois Curfman McInnes } 1308965ea79SLois Curfman McInnes } 1318965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1328965ea79SLois Curfman McInnes 1338965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 1340452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1356d84be18SBarry Smith MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 1368965ea79SLois Curfman McInnes nreceives = work[rank]; 137e3372554SBarry Smith if (nreceives > size) SETERRQ(1,0,"Internal PETSc error"); 1386d84be18SBarry Smith MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 1398965ea79SLois Curfman McInnes nmax = work[rank]; 1400452661fSBarry Smith PetscFree(work); 1418965ea79SLois Curfman McInnes 1428965ea79SLois Curfman McInnes /* post receives: 1438965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 1448965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 1458965ea79SLois Curfman McInnes to simplify the message passing. 1468965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 1478965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 1488965ea79SLois Curfman McInnes this is a lot of wasted space. 1498965ea79SLois Curfman McInnes 1508965ea79SLois Curfman McInnes This could be done better. 1518965ea79SLois Curfman McInnes */ 1523b2fbd54SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 1533b2fbd54SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1548965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 1557056b6fcSBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 1568965ea79SLois Curfman McInnes } 1578965ea79SLois Curfman McInnes 1588965ea79SLois Curfman McInnes /* do sends: 1598965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 1608965ea79SLois Curfman McInnes the ith processor 1618965ea79SLois Curfman McInnes */ 1623b2fbd54SBarry Smith svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1633b2fbd54SBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1640452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 1658965ea79SLois Curfman McInnes starts[0] = 0; 1668965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16739ddd567SLois Curfman McInnes for ( i=0; i<mdn->stash.n; i++ ) { 16839ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 16939ddd567SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 17039ddd567SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 1718965ea79SLois Curfman McInnes } 1720452661fSBarry Smith PetscFree(owner); 1738965ea79SLois Curfman McInnes starts[0] = 0; 1748965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1758965ea79SLois Curfman McInnes count = 0; 1768965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1778965ea79SLois Curfman McInnes if (procs[i]) { 1787056b6fcSBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++); 1798965ea79SLois Curfman McInnes } 1808965ea79SLois Curfman McInnes } 1810452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 1828965ea79SLois Curfman McInnes 1838965ea79SLois Curfman McInnes /* Free cache space */ 184d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIDense:Number of off-processor values %d\n",mdn->stash.n); 18539ddd567SLois Curfman McInnes ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 1868965ea79SLois Curfman McInnes 18739ddd567SLois Curfman McInnes mdn->svalues = svalues; mdn->rvalues = rvalues; 18839ddd567SLois Curfman McInnes mdn->nsends = nsends; mdn->nrecvs = nreceives; 18939ddd567SLois Curfman McInnes mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 19039ddd567SLois Curfman McInnes mdn->rmax = nmax; 1918965ea79SLois Curfman McInnes 1928965ea79SLois Curfman McInnes return 0; 1938965ea79SLois Curfman McInnes } 19439ddd567SLois Curfman McInnes extern int MatSetUpMultiply_MPIDense(Mat); 1958965ea79SLois Curfman McInnes 1965615d1e5SSatish Balay #undef __FUNC__ 1975615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIDense" 19839ddd567SLois Curfman McInnes static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 1998965ea79SLois Curfman McInnes { 20039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 2018965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 20239ddd567SLois Curfman McInnes int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 2038965ea79SLois Curfman McInnes Scalar *values,val; 20439ddd567SLois Curfman McInnes InsertMode addv = mdn->insertmode; 2058965ea79SLois Curfman McInnes 2068965ea79SLois Curfman McInnes /* wait on receives */ 2078965ea79SLois Curfman McInnes while (count) { 20839ddd567SLois Curfman McInnes MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 2098965ea79SLois Curfman McInnes /* unpack receives into our local space */ 21039ddd567SLois Curfman McInnes values = mdn->rvalues + 3*imdex*mdn->rmax; 2118965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2128965ea79SLois Curfman McInnes n = n/3; 2138965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 214227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - mdn->rstart; 215227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2168965ea79SLois Curfman McInnes val = values[3*i+2]; 21739ddd567SLois Curfman McInnes if (col >= 0 && col < mdn->N) { 21839ddd567SLois Curfman McInnes MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 2198965ea79SLois Curfman McInnes } 220e3372554SBarry Smith else {SETERRQ(1,0,"Invalid column");} 2218965ea79SLois Curfman McInnes } 2228965ea79SLois Curfman McInnes count--; 2238965ea79SLois Curfman McInnes } 2240452661fSBarry Smith PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 2258965ea79SLois Curfman McInnes 2268965ea79SLois Curfman McInnes /* wait on sends */ 22739ddd567SLois Curfman McInnes if (mdn->nsends) { 2287056b6fcSBarry Smith send_status = (MPI_Status *) PetscMalloc(mdn->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 22939ddd567SLois Curfman McInnes MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 2300452661fSBarry Smith PetscFree(send_status); 2318965ea79SLois Curfman McInnes } 2320452661fSBarry Smith PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 2338965ea79SLois Curfman McInnes 23439ddd567SLois Curfman McInnes mdn->insertmode = NOT_SET_VALUES; 23539ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 23639ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 2378965ea79SLois Curfman McInnes 2386d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 23939ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 2408965ea79SLois Curfman McInnes } 2418965ea79SLois Curfman McInnes return 0; 2428965ea79SLois Curfman McInnes } 2438965ea79SLois Curfman McInnes 2445615d1e5SSatish Balay #undef __FUNC__ 2455615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIDense" 24639ddd567SLois Curfman McInnes static int MatZeroEntries_MPIDense(Mat A) 2478965ea79SLois Curfman McInnes { 24839ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 24939ddd567SLois Curfman McInnes return MatZeroEntries(l->A); 2508965ea79SLois Curfman McInnes } 2518965ea79SLois Curfman McInnes 2525615d1e5SSatish Balay #undef __FUNC__ 2535615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIDense" 2544e220ebcSLois Curfman McInnes static int MatGetBlockSize_MPIDense(Mat A,int *bs) 2554e220ebcSLois Curfman McInnes { 2564e220ebcSLois Curfman McInnes *bs = 1; 2574e220ebcSLois Curfman McInnes return 0; 2584e220ebcSLois Curfman McInnes } 2594e220ebcSLois Curfman McInnes 2608965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2618965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2628965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 2633501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 2648965ea79SLois Curfman McInnes routine. 2658965ea79SLois Curfman McInnes */ 2665615d1e5SSatish Balay #undef __FUNC__ 2675615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIDense" 26839ddd567SLois Curfman McInnes static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 2698965ea79SLois Curfman McInnes { 27039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *) A->data; 2718965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 2728965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 2738965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 2748965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 2758965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 2768965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 2778965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 2788965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 2798965ea79SLois Curfman McInnes IS istmp; 2808965ea79SLois Curfman McInnes 28177c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 2828965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2838965ea79SLois Curfman McInnes 2848965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 2850452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 286cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 2870452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 2888965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 2898965ea79SLois Curfman McInnes idx = rows[i]; 2908965ea79SLois Curfman McInnes found = 0; 2918965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 2928965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 2938965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 2948965ea79SLois Curfman McInnes } 2958965ea79SLois Curfman McInnes } 296e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 2978965ea79SLois Curfman McInnes } 2988965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 2998965ea79SLois Curfman McInnes 3008965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 3010452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 3028965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 3038965ea79SLois Curfman McInnes nrecvs = work[rank]; 3048965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 3058965ea79SLois Curfman McInnes nmax = work[rank]; 3060452661fSBarry Smith PetscFree(work); 3078965ea79SLois Curfman McInnes 3088965ea79SLois Curfman McInnes /* post receives: */ 3090452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 3108965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 3110452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 3128965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 3138965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3148965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3158965ea79SLois Curfman McInnes } 3168965ea79SLois Curfman McInnes 3178965ea79SLois Curfman McInnes /* do sends: 3188965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 3198965ea79SLois Curfman McInnes the ith processor 3208965ea79SLois Curfman McInnes */ 3210452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3227056b6fcSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 3230452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3248965ea79SLois Curfman McInnes starts[0] = 0; 3258965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3268965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 3278965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 3288965ea79SLois Curfman McInnes } 3298965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 3308965ea79SLois Curfman McInnes 3318965ea79SLois Curfman McInnes starts[0] = 0; 3328965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3338965ea79SLois Curfman McInnes count = 0; 3348965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 3358965ea79SLois Curfman McInnes if (procs[i]) { 3368965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3378965ea79SLois Curfman McInnes } 3388965ea79SLois Curfman McInnes } 3390452661fSBarry Smith PetscFree(starts); 3408965ea79SLois Curfman McInnes 3418965ea79SLois Curfman McInnes base = owners[rank]; 3428965ea79SLois Curfman McInnes 3438965ea79SLois Curfman McInnes /* wait on receives */ 3440452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3458965ea79SLois Curfman McInnes source = lens + nrecvs; 3468965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 3478965ea79SLois Curfman McInnes while (count) { 3488965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3498965ea79SLois Curfman McInnes /* unpack receives into our local space */ 3508965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 3518965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3528965ea79SLois Curfman McInnes lens[imdex] = n; 3538965ea79SLois Curfman McInnes slen += n; 3548965ea79SLois Curfman McInnes count--; 3558965ea79SLois Curfman McInnes } 3560452661fSBarry Smith PetscFree(recv_waits); 3578965ea79SLois Curfman McInnes 3588965ea79SLois Curfman McInnes /* move the data into the send scatter */ 3590452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3608965ea79SLois Curfman McInnes count = 0; 3618965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 3628965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3638965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 3648965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3658965ea79SLois Curfman McInnes } 3668965ea79SLois Curfman McInnes } 3670452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 3680452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 3698965ea79SLois Curfman McInnes 3708965ea79SLois Curfman McInnes /* actually zap the local rows */ 371537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 3728965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 3730452661fSBarry Smith PetscFree(lrows); 3748965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 3758965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 3768965ea79SLois Curfman McInnes 3778965ea79SLois Curfman McInnes /* wait on sends */ 3788965ea79SLois Curfman McInnes if (nsends) { 3797056b6fcSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 3808965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 3810452661fSBarry Smith PetscFree(send_status); 3828965ea79SLois Curfman McInnes } 3830452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 3848965ea79SLois Curfman McInnes 3858965ea79SLois Curfman McInnes return 0; 3868965ea79SLois Curfman McInnes } 3878965ea79SLois Curfman McInnes 3885615d1e5SSatish Balay #undef __FUNC__ 3895615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIDense" 39039ddd567SLois Curfman McInnes static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 3918965ea79SLois Curfman McInnes { 39239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 3938965ea79SLois Curfman McInnes int ierr; 394c456f294SBarry Smith 39543a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 39643a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 39744cd7ae7SLois Curfman McInnes ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 3988965ea79SLois Curfman McInnes return 0; 3998965ea79SLois Curfman McInnes } 4008965ea79SLois Curfman McInnes 4015615d1e5SSatish Balay #undef __FUNC__ 4025615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIDense" 40339ddd567SLois Curfman McInnes static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4048965ea79SLois Curfman McInnes { 40539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4068965ea79SLois Curfman McInnes int ierr; 407c456f294SBarry Smith 40843a90d84SBarry Smith ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 40943a90d84SBarry Smith ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 41044cd7ae7SLois Curfman McInnes ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 4118965ea79SLois Curfman McInnes return 0; 4128965ea79SLois Curfman McInnes } 4138965ea79SLois Curfman McInnes 4145615d1e5SSatish Balay #undef __FUNC__ 4155615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIDense" 416096963f5SLois Curfman McInnes static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 417096963f5SLois Curfman McInnes { 418096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 419096963f5SLois Curfman McInnes int ierr; 4203501a2bdSLois Curfman McInnes Scalar zero = 0.0; 421096963f5SLois Curfman McInnes 4223501a2bdSLois Curfman McInnes ierr = VecSet(&zero,yy); CHKERRQ(ierr); 42344cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 424537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 425537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 426096963f5SLois Curfman McInnes return 0; 427096963f5SLois Curfman McInnes } 428096963f5SLois Curfman McInnes 4295615d1e5SSatish Balay #undef __FUNC__ 4305615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIDense" 431096963f5SLois Curfman McInnes static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 432096963f5SLois Curfman McInnes { 433096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 434096963f5SLois Curfman McInnes int ierr; 435096963f5SLois Curfman McInnes 4363501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz); CHKERRQ(ierr); 43744cd7ae7SLois Curfman McInnes ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 438537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 439537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 440096963f5SLois Curfman McInnes return 0; 441096963f5SLois Curfman McInnes } 442096963f5SLois Curfman McInnes 4435615d1e5SSatish Balay #undef __FUNC__ 4445615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIDense" 44539ddd567SLois Curfman McInnes static int MatGetDiagonal_MPIDense(Mat A,Vec v) 4468965ea79SLois Curfman McInnes { 44739ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 448096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 44944cd7ae7SLois Curfman McInnes int ierr, len, i, n, m = a->m, radd; 45044cd7ae7SLois Curfman McInnes Scalar *x, zero = 0.0; 451ed3cc1f0SBarry Smith 45244cd7ae7SLois Curfman McInnes VecSet(&zero,v); 453096963f5SLois Curfman McInnes ierr = VecGetArray(v,&x); CHKERRQ(ierr); 454096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n); CHKERRQ(ierr); 455e3372554SBarry Smith if (n != a->M) SETERRQ(1,0,"Nonconforming mat and vec"); 45644cd7ae7SLois Curfman McInnes len = PetscMin(aloc->m,aloc->n); 4577ddc982cSLois Curfman McInnes radd = a->rstart*m; 45844cd7ae7SLois Curfman McInnes for ( i=0; i<len; i++ ) { 459096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 460096963f5SLois Curfman McInnes } 461096963f5SLois Curfman McInnes return 0; 4628965ea79SLois Curfman McInnes } 4638965ea79SLois Curfman McInnes 4645615d1e5SSatish Balay #undef __FUNC__ 4655615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIDense" 46639ddd567SLois Curfman McInnes static int MatDestroy_MPIDense(PetscObject obj) 4678965ea79SLois Curfman McInnes { 4688965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 4693501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 4708965ea79SLois Curfman McInnes int ierr; 471ed3cc1f0SBarry Smith 4728965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 4733501a2bdSLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 4748965ea79SLois Curfman McInnes #endif 4750452661fSBarry Smith PetscFree(mdn->rowners); 4763501a2bdSLois Curfman McInnes ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 4773501a2bdSLois Curfman McInnes if (mdn->lvec) VecDestroy(mdn->lvec); 4783501a2bdSLois Curfman McInnes if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 479622d7880SLois Curfman McInnes if (mdn->factor) { 480622d7880SLois Curfman McInnes if (mdn->factor->temp) PetscFree(mdn->factor->temp); 481622d7880SLois Curfman McInnes if (mdn->factor->tag) PetscFree(mdn->factor->tag); 482622d7880SLois Curfman McInnes if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 483622d7880SLois Curfman McInnes PetscFree(mdn->factor); 484622d7880SLois Curfman McInnes } 4850452661fSBarry Smith PetscFree(mdn); 48690f02eecSBarry Smith if (mat->mapping) { 48790f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 48890f02eecSBarry Smith } 4898965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 4900452661fSBarry Smith PetscHeaderDestroy(mat); 4918965ea79SLois Curfman McInnes return 0; 4928965ea79SLois Curfman McInnes } 49339ddd567SLois Curfman McInnes 4948965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 4958965ea79SLois Curfman McInnes 4965615d1e5SSatish Balay #undef __FUNC__ 4975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_Binary" 49839ddd567SLois Curfman McInnes static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 4998965ea79SLois Curfman McInnes { 50039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 5018965ea79SLois Curfman McInnes int ierr; 5027056b6fcSBarry Smith 50339ddd567SLois Curfman McInnes if (mdn->size == 1) { 50439ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5058965ea79SLois Curfman McInnes } 506e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 5078965ea79SLois Curfman McInnes return 0; 5088965ea79SLois Curfman McInnes } 5098965ea79SLois Curfman McInnes 5105615d1e5SSatish Balay #undef __FUNC__ 5115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense_ASCII" 51239ddd567SLois Curfman McInnes static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 5138965ea79SLois Curfman McInnes { 51439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 51539ddd567SLois Curfman McInnes int ierr, format; 5168965ea79SLois Curfman McInnes FILE *fd; 51719bcc07fSBarry Smith ViewerType vtype; 5188965ea79SLois Curfman McInnes 51919bcc07fSBarry Smith ViewerGetType(viewer,&vtype); 52090ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 52190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 522639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5234e220ebcSLois Curfman McInnes int rank; 5244e220ebcSLois Curfman McInnes MatInfo info; 525096963f5SLois Curfman McInnes MPI_Comm_rank(mat->comm,&rank); 5264e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 52777c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 5284e220ebcSLois Curfman McInnes fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 5294e220ebcSLois Curfman McInnes (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 530096963f5SLois Curfman McInnes fflush(fd); 53177c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5323501a2bdSLois Curfman McInnes ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 5333501a2bdSLois Curfman McInnes return 0; 5343501a2bdSLois Curfman McInnes } 53502cad45dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 536096963f5SLois Curfman McInnes return 0; 5378965ea79SLois Curfman McInnes } 53819bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 53977c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 54039ddd567SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 54139ddd567SLois Curfman McInnes mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 54239ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5438965ea79SLois Curfman McInnes fflush(fd); 54477c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 5458965ea79SLois Curfman McInnes } 5468965ea79SLois Curfman McInnes else { 54739ddd567SLois Curfman McInnes int size = mdn->size, rank = mdn->rank; 5488965ea79SLois Curfman McInnes if (size == 1) { 54939ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 5508965ea79SLois Curfman McInnes } 5518965ea79SLois Curfman McInnes else { 5528965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 5538965ea79SLois Curfman McInnes Mat A; 55439ddd567SLois Curfman McInnes int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 55539ddd567SLois Curfman McInnes Scalar *vals; 55639ddd567SLois Curfman McInnes Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 5578965ea79SLois Curfman McInnes 5588965ea79SLois Curfman McInnes if (!rank) { 559*0513a670SBarry Smith ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 5608965ea79SLois Curfman McInnes } 5618965ea79SLois Curfman McInnes else { 562*0513a670SBarry Smith ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A); CHKERRQ(ierr); 5638965ea79SLois Curfman McInnes } 5648965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 5658965ea79SLois Curfman McInnes 56639ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 56739ddd567SLois Curfman McInnes but it's quick for now */ 56839ddd567SLois Curfman McInnes row = mdn->rstart; m = Amdn->m; 5698965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 57039ddd567SLois Curfman McInnes ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 57139ddd567SLois Curfman McInnes ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 57239ddd567SLois Curfman McInnes ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 57339ddd567SLois Curfman McInnes row++; 5748965ea79SLois Curfman McInnes } 5758965ea79SLois Curfman McInnes 5766d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5776d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5788965ea79SLois Curfman McInnes if (!rank) { 57939ddd567SLois Curfman McInnes ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 5808965ea79SLois Curfman McInnes } 5818965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 5828965ea79SLois Curfman McInnes } 5838965ea79SLois Curfman McInnes } 5848965ea79SLois Curfman McInnes return 0; 5858965ea79SLois Curfman McInnes } 5868965ea79SLois Curfman McInnes 5875615d1e5SSatish Balay #undef __FUNC__ 5885615d1e5SSatish Balay #define __FUNC__ "MatView_MPIDense" 58939ddd567SLois Curfman McInnes static int MatView_MPIDense(PetscObject obj,Viewer viewer) 5908965ea79SLois Curfman McInnes { 5918965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 59239ddd567SLois Curfman McInnes int ierr; 593bcd2baecSBarry Smith ViewerType vtype; 5948965ea79SLois Curfman McInnes 595bcd2baecSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 596bcd2baecSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 59739ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 5988965ea79SLois Curfman McInnes } 599bcd2baecSBarry Smith else if (vtype == ASCII_FILES_VIEWER) { 60039ddd567SLois Curfman McInnes ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 6018965ea79SLois Curfman McInnes } 602bcd2baecSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 60339ddd567SLois Curfman McInnes return MatView_MPIDense_Binary(mat,viewer); 6048965ea79SLois Curfman McInnes } 6058965ea79SLois Curfman McInnes return 0; 6068965ea79SLois Curfman McInnes } 6078965ea79SLois Curfman McInnes 6085615d1e5SSatish Balay #undef __FUNC__ 6095615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIDense" 6104e220ebcSLois Curfman McInnes static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 6118965ea79SLois Curfman McInnes { 6123501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6133501a2bdSLois Curfman McInnes Mat mdn = mat->A; 6144e220ebcSLois Curfman McInnes int ierr; 6154e220ebcSLois Curfman McInnes double isend[5], irecv[5]; 6168965ea79SLois Curfman McInnes 6174e220ebcSLois Curfman McInnes info->rows_global = (double)mat->M; 6184e220ebcSLois Curfman McInnes info->columns_global = (double)mat->N; 6194e220ebcSLois Curfman McInnes info->rows_local = (double)mat->m; 6204e220ebcSLois Curfman McInnes info->columns_local = (double)mat->N; 6214e220ebcSLois Curfman McInnes info->block_size = 1.0; 6224e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info); CHKERRQ(ierr); 6234e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 6244e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 6258965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 6264e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 6274e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 6284e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 6294e220ebcSLois Curfman McInnes info->memory = isend[3]; 6304e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 6318965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 6323501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 6334e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6344e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6354e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6364e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6374e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6388965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 6393501a2bdSLois Curfman McInnes MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 6404e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6414e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6424e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6434e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6444e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6458965ea79SLois Curfman McInnes } 6464e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 6474e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 6484e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 6498965ea79SLois Curfman McInnes return 0; 6508965ea79SLois Curfman McInnes } 6518965ea79SLois Curfman McInnes 6528c469469SLois Curfman McInnes /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 6538aaee692SLois Curfman McInnes extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 6548aaee692SLois Curfman McInnes extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 6558aaee692SLois Curfman McInnes extern int MatSolve_MPIDense(Mat,Vec,Vec); 6568c469469SLois Curfman McInnes extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 6578aaee692SLois Curfman McInnes extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 6588aaee692SLois Curfman McInnes extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 6598aaee692SLois Curfman McInnes 6605615d1e5SSatish Balay #undef __FUNC__ 6615615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIDense" 66239ddd567SLois Curfman McInnes static int MatSetOption_MPIDense(Mat A,MatOption op) 6638965ea79SLois Curfman McInnes { 66439ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 6658965ea79SLois Curfman McInnes 6666d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 6676d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 668219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 669219d9a1aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED) { 670b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 671b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 672aeafbbfcSLois Curfman McInnes a->roworiented = 1; 6738965ea79SLois Curfman McInnes MatSetOption(a->A,op); 674b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 675219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 6766d4a8577SBarry Smith op == MAT_SYMMETRIC || 6776d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 6786d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 67994a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 6806d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) 68139b7565bSBarry Smith {a->roworiented = 0; MatSetOption(a->A,op);} 6826d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 683e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 6848965ea79SLois Curfman McInnes else 685e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 6868965ea79SLois Curfman McInnes return 0; 6878965ea79SLois Curfman McInnes } 6888965ea79SLois Curfman McInnes 6895615d1e5SSatish Balay #undef __FUNC__ 6905615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIDense" 6913501a2bdSLois Curfman McInnes static int MatGetSize_MPIDense(Mat A,int *m,int *n) 6928965ea79SLois Curfman McInnes { 6933501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 6948965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 6958965ea79SLois Curfman McInnes return 0; 6968965ea79SLois Curfman McInnes } 6978965ea79SLois Curfman McInnes 6985615d1e5SSatish Balay #undef __FUNC__ 6995615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIDense" 7003501a2bdSLois Curfman McInnes static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 7018965ea79SLois Curfman McInnes { 7023501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7038965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 7048965ea79SLois Curfman McInnes return 0; 7058965ea79SLois Curfman McInnes } 7068965ea79SLois Curfman McInnes 7075615d1e5SSatish Balay #undef __FUNC__ 7085615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIDense" 7093501a2bdSLois Curfman McInnes static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 7108965ea79SLois Curfman McInnes { 7113501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 7128965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 7138965ea79SLois Curfman McInnes return 0; 7148965ea79SLois Curfman McInnes } 7158965ea79SLois Curfman McInnes 7165615d1e5SSatish Balay #undef __FUNC__ 7175615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIDense" 7183501a2bdSLois Curfman McInnes static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 7198965ea79SLois Curfman McInnes { 7203501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 72139ddd567SLois Curfman McInnes int lrow, rstart = mat->rstart, rend = mat->rend; 7228965ea79SLois Curfman McInnes 723e3372554SBarry Smith if (row < rstart || row >= rend) SETERRQ(1,0,"only local rows") 7248965ea79SLois Curfman McInnes lrow = row - rstart; 72539ddd567SLois Curfman McInnes return MatGetRow(mat->A,lrow,nz,idx,v); 7268965ea79SLois Curfman McInnes } 7278965ea79SLois Curfman McInnes 7285615d1e5SSatish Balay #undef __FUNC__ 7295615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIDense" 73039ddd567SLois Curfman McInnes static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 7318965ea79SLois Curfman McInnes { 7320452661fSBarry Smith if (idx) PetscFree(*idx); 7330452661fSBarry Smith if (v) PetscFree(*v); 7348965ea79SLois Curfman McInnes return 0; 7358965ea79SLois Curfman McInnes } 7368965ea79SLois Curfman McInnes 7375615d1e5SSatish Balay #undef __FUNC__ 7385615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIDense" 739cddf8d76SBarry Smith static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 740096963f5SLois Curfman McInnes { 7413501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 7423501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 7433501a2bdSLois Curfman McInnes int ierr, i, j; 7443501a2bdSLois Curfman McInnes double sum = 0.0; 7453501a2bdSLois Curfman McInnes Scalar *v = mat->v; 7463501a2bdSLois Curfman McInnes 7473501a2bdSLois Curfman McInnes if (mdn->size == 1) { 7483501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 7493501a2bdSLois Curfman McInnes } else { 7503501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 7513501a2bdSLois Curfman McInnes for (i=0; i<mat->n*mat->m; i++ ) { 7523501a2bdSLois Curfman McInnes #if defined(PETSC_COMPLEX) 7533501a2bdSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 7543501a2bdSLois Curfman McInnes #else 7553501a2bdSLois Curfman McInnes sum += (*v)*(*v); v++; 7563501a2bdSLois Curfman McInnes #endif 7573501a2bdSLois Curfman McInnes } 7586d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 7593501a2bdSLois Curfman McInnes *norm = sqrt(*norm); 7603501a2bdSLois Curfman McInnes PLogFlops(2*mat->n*mat->m); 7613501a2bdSLois Curfman McInnes } 7623501a2bdSLois Curfman McInnes else if (type == NORM_1) { 7633501a2bdSLois Curfman McInnes double *tmp, *tmp2; 7640452661fSBarry Smith tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 7653501a2bdSLois Curfman McInnes tmp2 = tmp + mdn->N; 766cddf8d76SBarry Smith PetscMemzero(tmp,2*mdn->N*sizeof(double)); 767096963f5SLois Curfman McInnes *norm = 0.0; 7683501a2bdSLois Curfman McInnes v = mat->v; 7693501a2bdSLois Curfman McInnes for ( j=0; j<mat->n; j++ ) { 7703501a2bdSLois Curfman McInnes for ( i=0; i<mat->m; i++ ) { 77167e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 7723501a2bdSLois Curfman McInnes } 7733501a2bdSLois Curfman McInnes } 7746d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 7753501a2bdSLois Curfman McInnes for ( j=0; j<mdn->N; j++ ) { 7763501a2bdSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 7773501a2bdSLois Curfman McInnes } 7780452661fSBarry Smith PetscFree(tmp); 7793501a2bdSLois Curfman McInnes PLogFlops(mat->n*mat->m); 7803501a2bdSLois Curfman McInnes } 7813501a2bdSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 7823501a2bdSLois Curfman McInnes double ntemp; 7833501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 7846d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 7853501a2bdSLois Curfman McInnes } 7863501a2bdSLois Curfman McInnes else { 787e3372554SBarry Smith SETERRQ(1,0,"No support for two norm"); 7883501a2bdSLois Curfman McInnes } 7893501a2bdSLois Curfman McInnes } 7903501a2bdSLois Curfman McInnes return 0; 7913501a2bdSLois Curfman McInnes } 7923501a2bdSLois Curfman McInnes 7935615d1e5SSatish Balay #undef __FUNC__ 7945615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIDense" 7953501a2bdSLois Curfman McInnes static int MatTranspose_MPIDense(Mat A,Mat *matout) 7963501a2bdSLois Curfman McInnes { 7973501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *) A->data; 7983501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 7993501a2bdSLois Curfman McInnes Mat B; 8003501a2bdSLois Curfman McInnes int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 8013501a2bdSLois Curfman McInnes int j, i, ierr; 8023501a2bdSLois Curfman McInnes Scalar *v; 8033501a2bdSLois Curfman McInnes 8047056b6fcSBarry Smith if (matout == PETSC_NULL && M != N) { 805e3372554SBarry Smith SETERRQ(1,0,"Supports square matrix only in-place"); 8067056b6fcSBarry Smith } 8077056b6fcSBarry Smith ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 8083501a2bdSLois Curfman McInnes 8093501a2bdSLois Curfman McInnes m = Aloc->m; n = Aloc->n; v = Aloc->v; 8100452661fSBarry Smith rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 8113501a2bdSLois Curfman McInnes for ( j=0; j<n; j++ ) { 8123501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 8133501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 8143501a2bdSLois Curfman McInnes v += m; 8153501a2bdSLois Curfman McInnes } 8160452661fSBarry Smith PetscFree(rwork); 8176d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8186d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8193638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 8203501a2bdSLois Curfman McInnes *matout = B; 8213501a2bdSLois Curfman McInnes } else { 8223501a2bdSLois Curfman McInnes /* This isn't really an in-place transpose, but free data struct from a */ 8230452661fSBarry Smith PetscFree(a->rowners); 8243501a2bdSLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 8253501a2bdSLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 8263501a2bdSLois Curfman McInnes if (a->Mvctx) VecScatterDestroy(a->Mvctx); 8270452661fSBarry Smith PetscFree(a); 8283501a2bdSLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 8290452661fSBarry Smith PetscHeaderDestroy(B); 8303501a2bdSLois Curfman McInnes } 831096963f5SLois Curfman McInnes return 0; 832096963f5SLois Curfman McInnes } 833096963f5SLois Curfman McInnes 83444cd7ae7SLois Curfman McInnes #include "pinclude/plapack.h" 8355615d1e5SSatish Balay #undef __FUNC__ 8365615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIDense" 83744cd7ae7SLois Curfman McInnes static int MatScale_MPIDense(Scalar *alpha,Mat inA) 83844cd7ae7SLois Curfman McInnes { 83944cd7ae7SLois Curfman McInnes Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 84044cd7ae7SLois Curfman McInnes Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 84144cd7ae7SLois Curfman McInnes int one = 1, nz; 84244cd7ae7SLois Curfman McInnes 84344cd7ae7SLois Curfman McInnes nz = a->m*a->n; 84444cd7ae7SLois Curfman McInnes BLscal_( &nz, alpha, a->v, &one ); 84544cd7ae7SLois Curfman McInnes PLogFlops(nz); 84644cd7ae7SLois Curfman McInnes return 0; 84744cd7ae7SLois Curfman McInnes } 84844cd7ae7SLois Curfman McInnes 8493d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat,Mat *,int); 8508965ea79SLois Curfman McInnes 8518965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 85239ddd567SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIDense, 85339ddd567SLois Curfman McInnes MatGetRow_MPIDense,MatRestoreRow_MPIDense, 85439ddd567SLois Curfman McInnes MatMult_MPIDense,MatMultAdd_MPIDense, 855096963f5SLois Curfman McInnes MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 856e7ca0642SLois Curfman McInnes /* MatSolve_MPIDense,0, */ 857e7ca0642SLois Curfman McInnes 0,0, 85839ddd567SLois Curfman McInnes 0,0, 8598c469469SLois Curfman McInnes 0,0, 8608c469469SLois Curfman McInnes /* MatLUFactor_MPIDense,0, */ 8618aaee692SLois Curfman McInnes 0,MatTranspose_MPIDense, 86239ddd567SLois Curfman McInnes MatGetInfo_MPIDense,0, 863096963f5SLois Curfman McInnes MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 86439ddd567SLois Curfman McInnes MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 8658965ea79SLois Curfman McInnes 0, 86639ddd567SLois Curfman McInnes MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 8673b2fbd54SBarry Smith 0,0, 8688c469469SLois Curfman McInnes /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 8698aaee692SLois Curfman McInnes 0,0, 87039ddd567SLois Curfman McInnes MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 87139ddd567SLois Curfman McInnes MatGetOwnershipRange_MPIDense, 872ff14e315SSatish Balay 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 873905e6a2fSBarry Smith 0,MatConvertSameType_MPIDense, 874b49de8d1SLois Curfman McInnes 0,0,0,0,0, 8754e220ebcSLois Curfman McInnes 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense, 8764e220ebcSLois Curfman McInnes 0,0,0,MatGetBlockSize_MPIDense}; 8778965ea79SLois Curfman McInnes 8785615d1e5SSatish Balay #undef __FUNC__ 8795615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIDense" 8808965ea79SLois Curfman McInnes /*@C 88139ddd567SLois Curfman McInnes MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 8828965ea79SLois Curfman McInnes 8838965ea79SLois Curfman McInnes Input Parameters: 8848965ea79SLois Curfman McInnes . comm - MPI communicator 8858965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 8868965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 8878965ea79SLois Curfman McInnes if N is given) 8888965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 8898965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 8908965ea79SLois Curfman McInnes if n is given) 891b4fd4287SBarry Smith . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 892dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 8938965ea79SLois Curfman McInnes 8948965ea79SLois Curfman McInnes Output Parameter: 895477f1c0bSLois Curfman McInnes . A - the matrix 8968965ea79SLois Curfman McInnes 8978965ea79SLois Curfman McInnes Notes: 89839ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 89939ddd567SLois Curfman McInnes storage by columns. 9008965ea79SLois Curfman McInnes 90118f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 90218f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 903b4fd4287SBarry Smith set data=PETSC_NULL. 90418f449edSLois Curfman McInnes 9058965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 9068965ea79SLois Curfman McInnes (possibly both). 9078965ea79SLois Curfman McInnes 9083501a2bdSLois Curfman McInnes Currently, the only parallel dense matrix decomposition is by rows, 9093501a2bdSLois Curfman McInnes so that n=N and each submatrix owns all of the global columns. 9103501a2bdSLois Curfman McInnes 91139ddd567SLois Curfman McInnes .keywords: matrix, dense, parallel 9128965ea79SLois Curfman McInnes 91339ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 9148965ea79SLois Curfman McInnes @*/ 915477f1c0bSLois Curfman McInnes int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 9168965ea79SLois Curfman McInnes { 9178965ea79SLois Curfman McInnes Mat mat; 91839ddd567SLois Curfman McInnes Mat_MPIDense *a; 91925cdf11fSBarry Smith int ierr, i,flg; 9208965ea79SLois Curfman McInnes 921ed2daf61SLois Curfman McInnes /* Note: For now, when data is specified above, this assumes the user correctly 922ed2daf61SLois Curfman McInnes allocates the local dense storage space. We should add error checking. */ 92318f449edSLois Curfman McInnes 924477f1c0bSLois Curfman McInnes *A = 0; 9250452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 9268965ea79SLois Curfman McInnes PLogObjectCreate(mat); 9270452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 9288965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 92939ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 93039ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 9318965ea79SLois Curfman McInnes mat->factor = 0; 93290f02eecSBarry Smith mat->mapping = 0; 9338965ea79SLois Curfman McInnes 934622d7880SLois Curfman McInnes a->factor = 0; 9358965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 9368965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 9378965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 9388965ea79SLois Curfman McInnes 93939ddd567SLois Curfman McInnes if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 9408965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 94139ddd567SLois Curfman McInnes 94239ddd567SLois Curfman McInnes /* each row stores all columns */ 94339ddd567SLois Curfman McInnes if (N == PETSC_DECIDE) N = n; 944d7e8b826SBarry Smith if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 945e3372554SBarry Smith /* if (n != N) SETERRQ(1,0,"For now, only n=N is supported"); */ 946aca0ad90SLois Curfman McInnes a->N = mat->N = N; 947aca0ad90SLois Curfman McInnes a->M = mat->M = M; 948aca0ad90SLois Curfman McInnes a->m = mat->m = m; 949aca0ad90SLois Curfman McInnes a->n = mat->n = n; 9508965ea79SLois Curfman McInnes 9518965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 952d7e8b826SBarry Smith a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 953d7e8b826SBarry Smith a->cowners = a->rowners + a->size + 1; 9547056b6fcSBarry Smith PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 9558965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 9568965ea79SLois Curfman McInnes a->rowners[0] = 0; 9578965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 9588965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 9598965ea79SLois Curfman McInnes } 9608965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 9618965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 962d7e8b826SBarry Smith MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 963d7e8b826SBarry Smith a->cowners[0] = 0; 964d7e8b826SBarry Smith for ( i=2; i<=a->size; i++ ) { 965d7e8b826SBarry Smith a->cowners[i] += a->cowners[i-1]; 966d7e8b826SBarry Smith } 9678965ea79SLois Curfman McInnes 96818f449edSLois Curfman McInnes ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 9698965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 9708965ea79SLois Curfman McInnes 9718965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 9728965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 9738965ea79SLois Curfman McInnes 9748965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 9758965ea79SLois Curfman McInnes a->lvec = 0; 9768965ea79SLois Curfman McInnes a->Mvctx = 0; 97739b7565bSBarry Smith a->roworiented = 1; 9788965ea79SLois Curfman McInnes 979477f1c0bSLois Curfman McInnes *A = mat; 98025cdf11fSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 98125cdf11fSBarry Smith if (flg) { 9828c469469SLois Curfman McInnes ierr = MatPrintHelp(mat); CHKERRQ(ierr); 9838c469469SLois Curfman McInnes } 9848965ea79SLois Curfman McInnes return 0; 9858965ea79SLois Curfman McInnes } 9868965ea79SLois Curfman McInnes 9875615d1e5SSatish Balay #undef __FUNC__ 9885615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIDense" 9893d1612f7SBarry Smith static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 9908965ea79SLois Curfman McInnes { 9918965ea79SLois Curfman McInnes Mat mat; 9923501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 99339ddd567SLois Curfman McInnes int ierr; 9942ba99913SLois Curfman McInnes FactorCtx *factor; 9958965ea79SLois Curfman McInnes 9968965ea79SLois Curfman McInnes *newmat = 0; 9970452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 9988965ea79SLois Curfman McInnes PLogObjectCreate(mat); 9990452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 10008965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 100139ddd567SLois Curfman McInnes mat->destroy = MatDestroy_MPIDense; 100239ddd567SLois Curfman McInnes mat->view = MatView_MPIDense; 10033501a2bdSLois Curfman McInnes mat->factor = A->factor; 1004c456f294SBarry Smith mat->assembled = PETSC_TRUE; 10058965ea79SLois Curfman McInnes 100644cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 100744cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 100844cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 100944cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 10102ba99913SLois Curfman McInnes if (oldmat->factor) { 10112ba99913SLois Curfman McInnes a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 10122ba99913SLois Curfman McInnes /* copy factor contents ... add this code! */ 10132ba99913SLois Curfman McInnes } else a->factor = 0; 10148965ea79SLois Curfman McInnes 10158965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 10168965ea79SLois Curfman McInnes a->rend = oldmat->rend; 10178965ea79SLois Curfman McInnes a->size = oldmat->size; 10188965ea79SLois Curfman McInnes a->rank = oldmat->rank; 10198965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 10208965ea79SLois Curfman McInnes 10210452661fSBarry Smith a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 102239ddd567SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 10238965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 10248965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 10258965ea79SLois Curfman McInnes 10268965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 10278965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 102855659b69SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 10298965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 10308965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 10318965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 10328965ea79SLois Curfman McInnes *newmat = mat; 10338965ea79SLois Curfman McInnes return 0; 10348965ea79SLois Curfman McInnes } 10358965ea79SLois Curfman McInnes 103677c4ece6SBarry Smith #include "sys.h" 10378965ea79SLois Curfman McInnes 10385615d1e5SSatish Balay #undef __FUNC__ 10395615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 104090ace30eSBarry Smith int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 104190ace30eSBarry Smith { 104290ace30eSBarry Smith int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 104390ace30eSBarry Smith Scalar *array,*vals,*vals_ptr; 104490ace30eSBarry Smith MPI_Status status; 104590ace30eSBarry Smith 104690ace30eSBarry Smith MPI_Comm_rank(comm,&rank); 104790ace30eSBarry Smith MPI_Comm_size(comm,&size); 104890ace30eSBarry Smith 104990ace30eSBarry Smith /* determine ownership of all rows */ 105090ace30eSBarry Smith m = M/size + ((M % size) > rank); 105190ace30eSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 105290ace30eSBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 105390ace30eSBarry Smith rowners[0] = 0; 105490ace30eSBarry Smith for ( i=2; i<=size; i++ ) { 105590ace30eSBarry Smith rowners[i] += rowners[i-1]; 105690ace30eSBarry Smith } 105790ace30eSBarry Smith rstart = rowners[rank]; 105890ace30eSBarry Smith rend = rowners[rank+1]; 105990ace30eSBarry Smith 106090ace30eSBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 106190ace30eSBarry Smith ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 106290ace30eSBarry Smith 106390ace30eSBarry Smith if (!rank) { 106490ace30eSBarry Smith vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 106590ace30eSBarry Smith 106690ace30eSBarry Smith /* read in my part of the matrix numerical values */ 106777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 106890ace30eSBarry Smith 106990ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 107090ace30eSBarry Smith vals_ptr = vals; 107190ace30eSBarry Smith for ( i=0; i<m; i++ ) { 107290ace30eSBarry Smith for ( j=0; j<N; j++ ) { 107390ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 107490ace30eSBarry Smith } 107590ace30eSBarry Smith } 107690ace30eSBarry Smith 107790ace30eSBarry Smith /* read in other processors and ship out */ 107890ace30eSBarry Smith for ( i=1; i<size; i++ ) { 107990ace30eSBarry Smith nz = (rowners[i+1] - rowners[i])*N; 108077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 108190ace30eSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 108290ace30eSBarry Smith } 108390ace30eSBarry Smith } 108490ace30eSBarry Smith else { 108590ace30eSBarry Smith /* receive numeric values */ 108690ace30eSBarry Smith vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 108790ace30eSBarry Smith 108890ace30eSBarry Smith /* receive message of values*/ 108990ace30eSBarry Smith MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 109090ace30eSBarry Smith 109190ace30eSBarry Smith /* insert into matrix-by row (this is why cannot directly read into array */ 109290ace30eSBarry Smith vals_ptr = vals; 109390ace30eSBarry Smith for ( i=0; i<m; i++ ) { 109490ace30eSBarry Smith for ( j=0; j<N; j++ ) { 109590ace30eSBarry Smith array[i + j*m] = *vals_ptr++; 109690ace30eSBarry Smith } 109790ace30eSBarry Smith } 109890ace30eSBarry Smith } 109990ace30eSBarry Smith PetscFree(rowners); 110090ace30eSBarry Smith PetscFree(vals); 11016d4a8577SBarry Smith ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11026d4a8577SBarry Smith ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 110390ace30eSBarry Smith return 0; 110490ace30eSBarry Smith } 110590ace30eSBarry Smith 110690ace30eSBarry Smith 11075615d1e5SSatish Balay #undef __FUNC__ 11085615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIDense" 110919bcc07fSBarry Smith int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 11108965ea79SLois Curfman McInnes { 11118965ea79SLois Curfman McInnes Mat A; 11128965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 11138965ea79SLois Curfman McInnes Scalar *vals,*svals; 111419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 11158965ea79SLois Curfman McInnes MPI_Status status; 11168965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 11178965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 111819bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 11198965ea79SLois Curfman McInnes 11208965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 11218965ea79SLois Curfman McInnes if (!rank) { 112290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 112377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1124e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 11258965ea79SLois Curfman McInnes } 11268965ea79SLois Curfman McInnes 11278965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 112890ace30eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 112990ace30eSBarry Smith 113090ace30eSBarry Smith /* 113190ace30eSBarry Smith Handle case where matrix is stored on disk as a dense matrix 113290ace30eSBarry Smith */ 113390ace30eSBarry Smith if (nz == MATRIX_BINARY_FORMAT_DENSE) { 113490ace30eSBarry Smith return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 113590ace30eSBarry Smith } 113690ace30eSBarry Smith 11378965ea79SLois Curfman McInnes /* determine ownership of all rows */ 11388965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 11390452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 11408965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 11418965ea79SLois Curfman McInnes rowners[0] = 0; 11428965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 11438965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 11448965ea79SLois Curfman McInnes } 11458965ea79SLois Curfman McInnes rstart = rowners[rank]; 11468965ea79SLois Curfman McInnes rend = rowners[rank+1]; 11478965ea79SLois Curfman McInnes 11488965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 11490452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 11508965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 11518965ea79SLois Curfman McInnes if (!rank) { 11520452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 115377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 11540452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 11558965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 11568965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 11570452661fSBarry Smith PetscFree(sndcounts); 11588965ea79SLois Curfman McInnes } 11598965ea79SLois Curfman McInnes else { 11608965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 11618965ea79SLois Curfman McInnes } 11628965ea79SLois Curfman McInnes 11638965ea79SLois Curfman McInnes if (!rank) { 11648965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 11650452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1166cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 11678965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 11688965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 11698965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 11708965ea79SLois Curfman McInnes } 11718965ea79SLois Curfman McInnes } 11720452661fSBarry Smith PetscFree(rowlengths); 11738965ea79SLois Curfman McInnes 11748965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 11758965ea79SLois Curfman McInnes maxnz = 0; 11768965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 11770452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 11788965ea79SLois Curfman McInnes } 11790452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 11808965ea79SLois Curfman McInnes 11818965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 11828965ea79SLois Curfman McInnes nz = procsnz[0]; 11830452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 118477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 11858965ea79SLois Curfman McInnes 11868965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 11878965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 11888965ea79SLois Curfman McInnes nz = procsnz[i]; 118977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 11908965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 11918965ea79SLois Curfman McInnes } 11920452661fSBarry Smith PetscFree(cols); 11938965ea79SLois Curfman McInnes } 11948965ea79SLois Curfman McInnes else { 11958965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 11968965ea79SLois Curfman McInnes nz = 0; 11978965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 11988965ea79SLois Curfman McInnes nz += ourlens[i]; 11998965ea79SLois Curfman McInnes } 12000452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 12018965ea79SLois Curfman McInnes 12028965ea79SLois Curfman McInnes /* receive message of column indices*/ 12038965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 12048965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 1205e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 12068965ea79SLois Curfman McInnes } 12078965ea79SLois Curfman McInnes 12088965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1209cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 12108965ea79SLois Curfman McInnes jj = 0; 12118965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12128965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 12138965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 12148965ea79SLois Curfman McInnes jj++; 12158965ea79SLois Curfman McInnes } 12168965ea79SLois Curfman McInnes } 12178965ea79SLois Curfman McInnes 12188965ea79SLois Curfman McInnes /* create our matrix */ 12198965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12208965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 12218965ea79SLois Curfman McInnes } 1222b4fd4287SBarry Smith ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 12238965ea79SLois Curfman McInnes A = *newmat; 12248965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12258965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 12268965ea79SLois Curfman McInnes } 12278965ea79SLois Curfman McInnes 12288965ea79SLois Curfman McInnes if (!rank) { 12290452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 12308965ea79SLois Curfman McInnes 12318965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 12328965ea79SLois Curfman McInnes nz = procsnz[0]; 123377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 12348965ea79SLois Curfman McInnes 12358965ea79SLois Curfman McInnes /* insert into matrix */ 12368965ea79SLois Curfman McInnes jj = rstart; 12378965ea79SLois Curfman McInnes smycols = mycols; 12388965ea79SLois Curfman McInnes svals = vals; 12398965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12408965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 12418965ea79SLois Curfman McInnes smycols += ourlens[i]; 12428965ea79SLois Curfman McInnes svals += ourlens[i]; 12438965ea79SLois Curfman McInnes jj++; 12448965ea79SLois Curfman McInnes } 12458965ea79SLois Curfman McInnes 12468965ea79SLois Curfman McInnes /* read in other processors and ship out */ 12478965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 12488965ea79SLois Curfman McInnes nz = procsnz[i]; 124977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 12508965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 12518965ea79SLois Curfman McInnes } 12520452661fSBarry Smith PetscFree(procsnz); 12538965ea79SLois Curfman McInnes } 12548965ea79SLois Curfman McInnes else { 12558965ea79SLois Curfman McInnes /* receive numeric values */ 12560452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 12578965ea79SLois Curfman McInnes 12588965ea79SLois Curfman McInnes /* receive message of values*/ 12598965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 12608965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1261e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 12628965ea79SLois Curfman McInnes 12638965ea79SLois Curfman McInnes /* insert into matrix */ 12648965ea79SLois Curfman McInnes jj = rstart; 12658965ea79SLois Curfman McInnes smycols = mycols; 12668965ea79SLois Curfman McInnes svals = vals; 12678965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 12688965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 12698965ea79SLois Curfman McInnes smycols += ourlens[i]; 12708965ea79SLois Curfman McInnes svals += ourlens[i]; 12718965ea79SLois Curfman McInnes jj++; 12728965ea79SLois Curfman McInnes } 12738965ea79SLois Curfman McInnes } 12740452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 12758965ea79SLois Curfman McInnes 12766d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12776d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12788965ea79SLois Curfman McInnes return 0; 12798965ea79SLois Curfman McInnes } 128090ace30eSBarry Smith 128190ace30eSBarry Smith 128290ace30eSBarry Smith 128390ace30eSBarry Smith 128490ace30eSBarry Smith 1285