179bdfe76SSatish Balay #ifndef lint 2*7d57db60SLois Curfman McInnes static char vcid[] = "$Id: mpibaij.c,v 1.23 1996/08/22 19:53:50 curfman Exp curfman $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 24537820f0SBarry Smith /* 25537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2857b952d6SSatish Balay length of colmap equals the global matrix length. 2957b952d6SSatish Balay */ 3057b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3157b952d6SSatish Balay { 3257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3357b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 3457b952d6SSatish Balay int nbs = B->nbs,i; 3557b952d6SSatish Balay 3657b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 3757b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3857b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 39905e6a2fSBarry Smith for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i + 1; 4057b952d6SSatish Balay return 0; 4157b952d6SSatish Balay } 4257b952d6SSatish Balay 4357b952d6SSatish Balay 44acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 4557b952d6SSatish Balay { 4657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4757b952d6SSatish Balay int ierr; 4857b952d6SSatish Balay if (baij->size == 1) { 4957b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 5057b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 5157b952d6SSatish Balay return 0; 5257b952d6SSatish Balay } 5357b952d6SSatish Balay 5457b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 5557b952d6SSatish Balay { 5657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 5757b952d6SSatish Balay Scalar value; 5857b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 5957b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 6057b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 6157b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 6257b952d6SSatish Balay 6357b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 6457b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 6557b952d6SSatish Balay } 6657b952d6SSatish Balay baij->insertmode = addv; 6757b952d6SSatish Balay rstart_orig = rstart*bs; 6857b952d6SSatish Balay rend_orig = rend*bs; 6957b952d6SSatish Balay cstart_orig = cstart*bs; 7057b952d6SSatish Balay cend_orig = cend*bs; 7157b952d6SSatish Balay for ( i=0; i<m; i++ ) { 7257b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 7357b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 7457b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 7557b952d6SSatish Balay row = im[i] - rstart_orig; 7657b952d6SSatish Balay for ( j=0; j<n; j++ ) { 7757b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 7857b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 7957b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 8057b952d6SSatish Balay col = in[j] - cstart_orig; 8157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 8257b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 8357b952d6SSatish Balay } 8457b952d6SSatish Balay else { 8557b952d6SSatish Balay if (mat->was_assembled) { 86905e6a2fSBarry Smith if (!baij->colmap) { 87905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 88905e6a2fSBarry Smith } 89905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 9057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 9157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 9257b952d6SSatish Balay col = in[j]; 9357b952d6SSatish Balay } 9457b952d6SSatish Balay } 9557b952d6SSatish Balay else col = in[j]; 9657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 9757b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 9857b952d6SSatish Balay } 9957b952d6SSatish Balay } 10057b952d6SSatish Balay } 10157b952d6SSatish Balay else { 10257b952d6SSatish Balay if (roworiented) { 10357b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 10457b952d6SSatish Balay } 10557b952d6SSatish Balay else { 10657b952d6SSatish Balay row = im[i]; 10757b952d6SSatish Balay for ( j=0; j<n; j++ ) { 10857b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 10957b952d6SSatish Balay } 11057b952d6SSatish Balay } 11157b952d6SSatish Balay } 11257b952d6SSatish Balay } 11357b952d6SSatish Balay return 0; 11457b952d6SSatish Balay } 11557b952d6SSatish Balay 116d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 117d6de1c52SSatish Balay { 118d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 119d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 120d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 121d6de1c52SSatish Balay 122d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 123d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 124d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 125d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 126d6de1c52SSatish Balay row = idxm[i] - bsrstart; 127d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 128d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 129d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 130d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 131d6de1c52SSatish Balay col = idxn[j] - bscstart; 132d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 133d6de1c52SSatish Balay } 134d6de1c52SSatish Balay else { 135905e6a2fSBarry Smith if (!baij->colmap) { 136905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 137905e6a2fSBarry Smith } 138e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 139e60e1c95SSatish Balay (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 140d9d09a02SSatish Balay else { 141905e6a2fSBarry Smith col = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs; 142d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 143d6de1c52SSatish Balay } 144d6de1c52SSatish Balay } 145d6de1c52SSatish Balay } 146d9d09a02SSatish Balay } 147d6de1c52SSatish Balay else { 148d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 149d6de1c52SSatish Balay } 150d6de1c52SSatish Balay } 151d6de1c52SSatish Balay return 0; 152d6de1c52SSatish Balay } 153d6de1c52SSatish Balay 154d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 155d6de1c52SSatish Balay { 156d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 157d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 158acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 159d6de1c52SSatish Balay double sum = 0.0; 160d6de1c52SSatish Balay Scalar *v; 161d6de1c52SSatish Balay 162d6de1c52SSatish Balay if (baij->size == 1) { 163d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 164d6de1c52SSatish Balay } else { 165d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 166d6de1c52SSatish Balay v = amat->a; 167d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 168d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 169d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 170d6de1c52SSatish Balay #else 171d6de1c52SSatish Balay sum += (*v)*(*v); v++; 172d6de1c52SSatish Balay #endif 173d6de1c52SSatish Balay } 174d6de1c52SSatish Balay v = bmat->a; 175d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 176d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 177d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 178d6de1c52SSatish Balay #else 179d6de1c52SSatish Balay sum += (*v)*(*v); v++; 180d6de1c52SSatish Balay #endif 181d6de1c52SSatish Balay } 182d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 183d6de1c52SSatish Balay *norm = sqrt(*norm); 184d6de1c52SSatish Balay } 185acdf5bf4SSatish Balay else 186acdf5bf4SSatish Balay SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 187d6de1c52SSatish Balay } 188d6de1c52SSatish Balay return 0; 189d6de1c52SSatish Balay } 19057b952d6SSatish Balay 19157b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 19257b952d6SSatish Balay { 19357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 19457b952d6SSatish Balay MPI_Comm comm = mat->comm; 19557b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 19657b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 19757b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 19857b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 19957b952d6SSatish Balay InsertMode addv; 20057b952d6SSatish Balay Scalar *rvalues,*svalues; 20157b952d6SSatish Balay 20257b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 20357b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 20457b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 20557b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 20657b952d6SSatish Balay } 20757b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 20857b952d6SSatish Balay 20957b952d6SSatish Balay /* first count number of contributors to each processor */ 21057b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 21157b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 21257b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 21357b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 21457b952d6SSatish Balay idx = baij->stash.idx[i]; 21557b952d6SSatish Balay for ( j=0; j<size; j++ ) { 21657b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 21757b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 21857b952d6SSatish Balay } 21957b952d6SSatish Balay } 22057b952d6SSatish Balay } 22157b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 22257b952d6SSatish Balay 22357b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 22457b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 22557b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 22657b952d6SSatish Balay nreceives = work[rank]; 22757b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 22857b952d6SSatish Balay nmax = work[rank]; 22957b952d6SSatish Balay PetscFree(work); 23057b952d6SSatish Balay 23157b952d6SSatish Balay /* post receives: 23257b952d6SSatish Balay 1) each message will consist of ordered pairs 23357b952d6SSatish Balay (global index,value) we store the global index as a double 23457b952d6SSatish Balay to simplify the message passing. 23557b952d6SSatish Balay 2) since we don't know how long each individual message is we 23657b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 23757b952d6SSatish Balay this is a lot of wasted space. 23857b952d6SSatish Balay 23957b952d6SSatish Balay 24057b952d6SSatish Balay This could be done better. 24157b952d6SSatish Balay */ 24257b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 24357b952d6SSatish Balay CHKPTRQ(rvalues); 24457b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 24557b952d6SSatish Balay CHKPTRQ(recv_waits); 24657b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 24757b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 24857b952d6SSatish Balay comm,recv_waits+i); 24957b952d6SSatish Balay } 25057b952d6SSatish Balay 25157b952d6SSatish Balay /* do sends: 25257b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 25357b952d6SSatish Balay the ith processor 25457b952d6SSatish Balay */ 25557b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 25657b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 25757b952d6SSatish Balay CHKPTRQ(send_waits); 25857b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 25957b952d6SSatish Balay starts[0] = 0; 26057b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 26157b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 26257b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 26357b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 26457b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 26557b952d6SSatish Balay } 26657b952d6SSatish Balay PetscFree(owner); 26757b952d6SSatish Balay starts[0] = 0; 26857b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 26957b952d6SSatish Balay count = 0; 27057b952d6SSatish Balay for ( i=0; i<size; i++ ) { 27157b952d6SSatish Balay if (procs[i]) { 27257b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 27357b952d6SSatish Balay comm,send_waits+count++); 27457b952d6SSatish Balay } 27557b952d6SSatish Balay } 27657b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 27757b952d6SSatish Balay 27857b952d6SSatish Balay /* Free cache space */ 27957b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 28057b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 28157b952d6SSatish Balay 28257b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 28357b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 28457b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 28557b952d6SSatish Balay baij->rmax = nmax; 28657b952d6SSatish Balay 28757b952d6SSatish Balay return 0; 28857b952d6SSatish Balay } 28957b952d6SSatish Balay 29057b952d6SSatish Balay 29157b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 29257b952d6SSatish Balay { 29357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 29457b952d6SSatish Balay MPI_Status *send_status,recv_status; 29557b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 29657b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 29757b952d6SSatish Balay Scalar *values,val; 29857b952d6SSatish Balay InsertMode addv = baij->insertmode; 29957b952d6SSatish Balay 30057b952d6SSatish Balay /* wait on receives */ 30157b952d6SSatish Balay while (count) { 30257b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 30357b952d6SSatish Balay /* unpack receives into our local space */ 30457b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 30557b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 30657b952d6SSatish Balay n = n/3; 30757b952d6SSatish Balay for ( i=0; i<n; i++ ) { 30857b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 30957b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 31057b952d6SSatish Balay val = values[3*i+2]; 31157b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 31257b952d6SSatish Balay col -= baij->cstart*bs; 31357b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 31457b952d6SSatish Balay } 31557b952d6SSatish Balay else { 31657b952d6SSatish Balay if (mat->was_assembled) { 317905e6a2fSBarry Smith if (!baij->colmap) { 318905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 319905e6a2fSBarry Smith } 320905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 32157b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 32257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 32357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 32457b952d6SSatish Balay } 32557b952d6SSatish Balay } 32657b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 32757b952d6SSatish Balay } 32857b952d6SSatish Balay } 32957b952d6SSatish Balay count--; 33057b952d6SSatish Balay } 33157b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 33257b952d6SSatish Balay 33357b952d6SSatish Balay /* wait on sends */ 33457b952d6SSatish Balay if (baij->nsends) { 33557b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 33657b952d6SSatish Balay CHKPTRQ(send_status); 33757b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 33857b952d6SSatish Balay PetscFree(send_status); 33957b952d6SSatish Balay } 34057b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 34157b952d6SSatish Balay 34257b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 34357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 34457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 34557b952d6SSatish Balay 34657b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 34757b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 34857b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 34957b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 35057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 35157b952d6SSatish Balay } 35257b952d6SSatish Balay 3536d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 35457b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 35557b952d6SSatish Balay } 35657b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 35757b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 35857b952d6SSatish Balay 35957b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 36057b952d6SSatish Balay return 0; 36157b952d6SSatish Balay } 36257b952d6SSatish Balay 36357b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 36457b952d6SSatish Balay { 36557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 36657b952d6SSatish Balay int ierr; 36757b952d6SSatish Balay 36857b952d6SSatish Balay if (baij->size == 1) { 36957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 37057b952d6SSatish Balay } 37157b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 37257b952d6SSatish Balay return 0; 37357b952d6SSatish Balay } 37457b952d6SSatish Balay 37557b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 37657b952d6SSatish Balay { 37757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 378cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 37957b952d6SSatish Balay FILE *fd; 38057b952d6SSatish Balay ViewerType vtype; 38157b952d6SSatish Balay 38257b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 38357b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 38457b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 38557b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 3864e220ebcSLois Curfman McInnes MatInfo info; 38757b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 38857b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 3894e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 39057b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 39157b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 3924e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 3934e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 3944e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 3954e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 3964e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 3974e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 39857b952d6SSatish Balay fflush(fd); 39957b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 40057b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 40157b952d6SSatish Balay return 0; 40257b952d6SSatish Balay } 40357b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 40457b952d6SSatish Balay return 0; 40557b952d6SSatish Balay } 40657b952d6SSatish Balay } 40757b952d6SSatish Balay 40857b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 40957b952d6SSatish Balay Draw draw; 41057b952d6SSatish Balay PetscTruth isnull; 41157b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 41257b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 41357b952d6SSatish Balay } 41457b952d6SSatish Balay 41557b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 41657b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 41757b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 41857b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 41957b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 42057b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 42157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 42257b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 42357b952d6SSatish Balay fflush(fd); 42457b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 42557b952d6SSatish Balay } 42657b952d6SSatish Balay else { 42757b952d6SSatish Balay int size = baij->size; 42857b952d6SSatish Balay rank = baij->rank; 42957b952d6SSatish Balay if (size == 1) { 43057b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 43157b952d6SSatish Balay } 43257b952d6SSatish Balay else { 43357b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 43457b952d6SSatish Balay Mat A; 43557b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 43657b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 43757b952d6SSatish Balay int mbs=baij->mbs; 43857b952d6SSatish Balay Scalar *a; 43957b952d6SSatish Balay 44057b952d6SSatish Balay if (!rank) { 441cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 44257b952d6SSatish Balay CHKERRQ(ierr); 44357b952d6SSatish Balay } 44457b952d6SSatish Balay else { 445cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 44657b952d6SSatish Balay CHKERRQ(ierr); 44757b952d6SSatish Balay } 44857b952d6SSatish Balay PLogObjectParent(mat,A); 44957b952d6SSatish Balay 45057b952d6SSatish Balay /* copy over the A part */ 45157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 45257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 45357b952d6SSatish Balay row = baij->rstart; 45457b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 45557b952d6SSatish Balay 45657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 45757b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 45857b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 45957b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 46057b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 46157b952d6SSatish Balay for (k=0; k<bs; k++ ) { 462cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 463cee3aa6bSSatish Balay col++; a += bs; 46457b952d6SSatish Balay } 46557b952d6SSatish Balay } 46657b952d6SSatish Balay } 46757b952d6SSatish Balay /* copy over the B part */ 46857b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 46957b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 47057b952d6SSatish Balay row = baij->rstart*bs; 47157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 47257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 47357b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 47457b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 47557b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 47657b952d6SSatish Balay for (k=0; k<bs; k++ ) { 477cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 478cee3aa6bSSatish Balay col++; a += bs; 47957b952d6SSatish Balay } 48057b952d6SSatish Balay } 48157b952d6SSatish Balay } 48257b952d6SSatish Balay PetscFree(rvals); 4836d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 4846d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 48557b952d6SSatish Balay if (!rank) { 48657b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 48757b952d6SSatish Balay } 48857b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 48957b952d6SSatish Balay } 49057b952d6SSatish Balay } 49157b952d6SSatish Balay return 0; 49257b952d6SSatish Balay } 49357b952d6SSatish Balay 49457b952d6SSatish Balay 49557b952d6SSatish Balay 49657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 49757b952d6SSatish Balay { 49857b952d6SSatish Balay Mat mat = (Mat) obj; 49957b952d6SSatish Balay int ierr; 50057b952d6SSatish Balay ViewerType vtype; 50157b952d6SSatish Balay 50257b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 50357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 50457b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 50557b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 50657b952d6SSatish Balay } 50757b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 50857b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 50957b952d6SSatish Balay } 51057b952d6SSatish Balay return 0; 51157b952d6SSatish Balay } 51257b952d6SSatish Balay 51379bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 51479bdfe76SSatish Balay { 51579bdfe76SSatish Balay Mat mat = (Mat) obj; 51679bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 51779bdfe76SSatish Balay int ierr; 51879bdfe76SSatish Balay 51979bdfe76SSatish Balay #if defined(PETSC_LOG) 52079bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 52179bdfe76SSatish Balay #endif 52279bdfe76SSatish Balay 52379bdfe76SSatish Balay PetscFree(baij->rowners); 52479bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 52579bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 52679bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 52779bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 52879bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 52979bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 53079bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 53179bdfe76SSatish Balay PetscFree(baij); 53279bdfe76SSatish Balay PLogObjectDestroy(mat); 53379bdfe76SSatish Balay PetscHeaderDestroy(mat); 53479bdfe76SSatish Balay return 0; 53579bdfe76SSatish Balay } 53679bdfe76SSatish Balay 537cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 538cee3aa6bSSatish Balay { 539cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 54047b4a8eaSLois Curfman McInnes int ierr, nt; 541cee3aa6bSSatish Balay 542c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 54347b4a8eaSLois Curfman McInnes if (nt != a->n) { 5440ac07820SSatish Balay SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx"); 54547b4a8eaSLois Curfman McInnes } 546c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 54747b4a8eaSLois Curfman McInnes if (nt != a->m) { 5480ac07820SSatish Balay SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy"); 54947b4a8eaSLois Curfman McInnes } 550cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 551cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 552cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 553cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 554cee3aa6bSSatish Balay return 0; 555cee3aa6bSSatish Balay } 556cee3aa6bSSatish Balay 557cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 558cee3aa6bSSatish Balay { 559cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 560cee3aa6bSSatish Balay int ierr; 561cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 562cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 563cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 564cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 565cee3aa6bSSatish Balay return 0; 566cee3aa6bSSatish Balay } 567cee3aa6bSSatish Balay 568cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 569cee3aa6bSSatish Balay { 570cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 571cee3aa6bSSatish Balay int ierr; 572cee3aa6bSSatish Balay 573cee3aa6bSSatish Balay /* do nondiagonal part */ 574cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 575cee3aa6bSSatish Balay /* send it on its way */ 576537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 577cee3aa6bSSatish Balay /* do local part */ 578cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 579cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 580cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 581cee3aa6bSSatish Balay /* but is not perhaps always true. */ 582cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 583cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx);CHKERRQ(ierr); 584cee3aa6bSSatish Balay return 0; 585cee3aa6bSSatish Balay } 586cee3aa6bSSatish Balay 587cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 588cee3aa6bSSatish Balay { 589cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 590cee3aa6bSSatish Balay int ierr; 591cee3aa6bSSatish Balay 592cee3aa6bSSatish Balay /* do nondiagonal part */ 593cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 594cee3aa6bSSatish Balay /* send it on its way */ 595537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 596cee3aa6bSSatish Balay /* do local part */ 597cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 598cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 599cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 600cee3aa6bSSatish Balay /* but is not perhaps always true. */ 601537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 602cee3aa6bSSatish Balay return 0; 603cee3aa6bSSatish Balay } 604cee3aa6bSSatish Balay 605cee3aa6bSSatish Balay /* 606cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 607cee3aa6bSSatish Balay diagonal block 608cee3aa6bSSatish Balay */ 609cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 610cee3aa6bSSatish Balay { 611cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 612cee3aa6bSSatish Balay if (a->M != a->N) 613cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 614cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 615cee3aa6bSSatish Balay } 616cee3aa6bSSatish Balay 617cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 618cee3aa6bSSatish Balay { 619cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 620cee3aa6bSSatish Balay int ierr; 621cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 622cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 623cee3aa6bSSatish Balay return 0; 624cee3aa6bSSatish Balay } 62557b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 62657b952d6SSatish Balay { 62757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 62857b952d6SSatish Balay *m = mat->M; *n = mat->N; 62957b952d6SSatish Balay return 0; 63057b952d6SSatish Balay } 63157b952d6SSatish Balay 63257b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 63357b952d6SSatish Balay { 63457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 63557b952d6SSatish Balay *m = mat->m; *n = mat->N; 63657b952d6SSatish Balay return 0; 63757b952d6SSatish Balay } 63857b952d6SSatish Balay 63957b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 64057b952d6SSatish Balay { 64157b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 64257b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 64357b952d6SSatish Balay return 0; 64457b952d6SSatish Balay } 64557b952d6SSatish Balay 646acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 647acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 648acdf5bf4SSatish Balay 649acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 650acdf5bf4SSatish Balay { 651acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 652acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 653acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 654d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 655d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 656acdf5bf4SSatish Balay 657acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 658acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 659acdf5bf4SSatish Balay 660acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 661acdf5bf4SSatish Balay /* 662acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 663acdf5bf4SSatish Balay */ 664acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 665bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 666bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 667acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 668acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 669acdf5bf4SSatish Balay } 670acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 671acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 672acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 673acdf5bf4SSatish Balay } 674acdf5bf4SSatish Balay 675acdf5bf4SSatish Balay 676d9d09a02SSatish Balay if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 677d9d09a02SSatish Balay lrow = row - brstart; 678acdf5bf4SSatish Balay 679acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 680acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 681acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 682acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 683acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 684acdf5bf4SSatish Balay nztot = nzA + nzB; 685acdf5bf4SSatish Balay 686acdf5bf4SSatish Balay cmap = mat->garray; 687acdf5bf4SSatish Balay if (v || idx) { 688acdf5bf4SSatish Balay if (nztot) { 689acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 690acdf5bf4SSatish Balay int imark = -1; 691acdf5bf4SSatish Balay if (v) { 692acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 693acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 694d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 695acdf5bf4SSatish Balay else break; 696acdf5bf4SSatish Balay } 697acdf5bf4SSatish Balay imark = i; 698acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 699acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 700acdf5bf4SSatish Balay } 701acdf5bf4SSatish Balay if (idx) { 702acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 703acdf5bf4SSatish Balay if (imark > -1) { 704acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 705bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 706acdf5bf4SSatish Balay } 707acdf5bf4SSatish Balay } else { 708acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 709d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 710d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 711acdf5bf4SSatish Balay else break; 712acdf5bf4SSatish Balay } 713acdf5bf4SSatish Balay imark = i; 714acdf5bf4SSatish Balay } 715d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 716d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 717acdf5bf4SSatish Balay } 718acdf5bf4SSatish Balay } 719d212a18eSSatish Balay else { 720d212a18eSSatish Balay if (idx) *idx = 0; 721d212a18eSSatish Balay if (v) *v = 0; 722d212a18eSSatish Balay } 723acdf5bf4SSatish Balay } 724acdf5bf4SSatish Balay *nz = nztot; 725acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 726acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 727acdf5bf4SSatish Balay return 0; 728acdf5bf4SSatish Balay } 729acdf5bf4SSatish Balay 730acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 731acdf5bf4SSatish Balay { 732acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 733acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 734acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 735acdf5bf4SSatish Balay } 736acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 737acdf5bf4SSatish Balay return 0; 738acdf5bf4SSatish Balay } 739acdf5bf4SSatish Balay 7405a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 7415a838052SSatish Balay { 7425a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 7435a838052SSatish Balay *bs = baij->bs; 7445a838052SSatish Balay return 0; 7455a838052SSatish Balay } 7465a838052SSatish Balay 74758667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 74858667388SSatish Balay { 74958667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 75058667388SSatish Balay int ierr; 75158667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 75258667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 75358667388SSatish Balay return 0; 75458667388SSatish Balay } 7550ac07820SSatish Balay 7564e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 7570ac07820SSatish Balay { 7584e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 7594e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 760*7d57db60SLois Curfman McInnes int ierr; 761*7d57db60SLois Curfman McInnes double isend[5], irecv[5]; 7620ac07820SSatish Balay 7634e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 7644e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 7654e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 7664e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 7674e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 7684e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 7694e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 7704e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 7714e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 7720ac07820SSatish Balay if (flag == MAT_LOCAL) { 7734e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7744e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7754e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7764e220ebcSLois Curfman McInnes info->memory = isend[3]; 7774e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7780ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 7790ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm); 7804e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7814e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7824e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7834e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7844e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7850ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 7860ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm); 7874e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7884e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7894e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7904e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7914e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7920ac07820SSatish Balay } 7934e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7944e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7954e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7960ac07820SSatish Balay return 0; 7970ac07820SSatish Balay } 7980ac07820SSatish Balay 79958667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 80058667388SSatish Balay { 80158667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 80258667388SSatish Balay 80358667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 80458667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 80558667388SSatish Balay op == MAT_COLUMNS_SORTED || 80658667388SSatish Balay op == MAT_ROW_ORIENTED) { 80758667388SSatish Balay MatSetOption(a->A,op); 80858667388SSatish Balay MatSetOption(a->B,op); 80958667388SSatish Balay } 81058667388SSatish Balay else if (op == MAT_ROWS_SORTED || 81158667388SSatish Balay op == MAT_SYMMETRIC || 81258667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 81358667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 81458667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 81558667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 81658667388SSatish Balay a->roworiented = 0; 81758667388SSatish Balay MatSetOption(a->A,op); 81858667388SSatish Balay MatSetOption(a->B,op); 81958667388SSatish Balay } 82058667388SSatish Balay else if (op == MAT_NO_NEW_DIAGONALS) 8210ac07820SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");} 82258667388SSatish Balay else 8230ac07820SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");} 82458667388SSatish Balay return 0; 82558667388SSatish Balay } 82658667388SSatish Balay 8270ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 8280ac07820SSatish Balay { 8290ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 8300ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 8310ac07820SSatish Balay Mat B; 8320ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 8330ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 8340ac07820SSatish Balay Scalar *a; 8350ac07820SSatish Balay 8360ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 8370ac07820SSatish Balay SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place"); 8380ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 8390ac07820SSatish Balay CHKERRQ(ierr); 8400ac07820SSatish Balay 8410ac07820SSatish Balay /* copy over the A part */ 8420ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 8430ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 8440ac07820SSatish Balay row = baij->rstart; 8450ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 8460ac07820SSatish Balay 8470ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 8480ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 8490ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 8500ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 8510ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 8520ac07820SSatish Balay for (k=0; k<bs; k++ ) { 8530ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 8540ac07820SSatish Balay col++; a += bs; 8550ac07820SSatish Balay } 8560ac07820SSatish Balay } 8570ac07820SSatish Balay } 8580ac07820SSatish Balay /* copy over the B part */ 8590ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 8600ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 8610ac07820SSatish Balay row = baij->rstart*bs; 8620ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 8630ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 8640ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 8650ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 8660ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 8670ac07820SSatish Balay for (k=0; k<bs; k++ ) { 8680ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 8690ac07820SSatish Balay col++; a += bs; 8700ac07820SSatish Balay } 8710ac07820SSatish Balay } 8720ac07820SSatish Balay } 8730ac07820SSatish Balay PetscFree(rvals); 8740ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8750ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8760ac07820SSatish Balay 8770ac07820SSatish Balay if (matout != PETSC_NULL) { 8780ac07820SSatish Balay *matout = B; 8790ac07820SSatish Balay } else { 8800ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 8810ac07820SSatish Balay PetscFree(baij->rowners); 8820ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 8830ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 8840ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 8850ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 8860ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 8870ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 8880ac07820SSatish Balay PetscFree(baij); 8890ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 8900ac07820SSatish Balay PetscHeaderDestroy(B); 8910ac07820SSatish Balay } 8920ac07820SSatish Balay return 0; 8930ac07820SSatish Balay } 8940ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 8950ac07820SSatish Balay matrix is square and the column and row owerships are identical. 8960ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 8970ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 8980ac07820SSatish Balay routine. 8990ac07820SSatish Balay */ 9000ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 9010ac07820SSatish Balay { 9020ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 9030ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 9040ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 9050ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 9060ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 9070ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 9080ac07820SSatish Balay MPI_Comm comm = A->comm; 9090ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 9100ac07820SSatish Balay MPI_Status recv_status,*send_status; 9110ac07820SSatish Balay IS istmp; 9120ac07820SSatish Balay 9130ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 9140ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 9150ac07820SSatish Balay 9160ac07820SSatish Balay /* first count number of contributors to each processor */ 9170ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 9180ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 9190ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 9200ac07820SSatish Balay for ( i=0; i<N; i++ ) { 9210ac07820SSatish Balay idx = rows[i]; 9220ac07820SSatish Balay found = 0; 9230ac07820SSatish Balay for ( j=0; j<size; j++ ) { 9240ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 9250ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 9260ac07820SSatish Balay } 9270ac07820SSatish Balay } 9280ac07820SSatish Balay if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range"); 9290ac07820SSatish Balay } 9300ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 9310ac07820SSatish Balay 9320ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 9330ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 9340ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 9350ac07820SSatish Balay nrecvs = work[rank]; 9360ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 9370ac07820SSatish Balay nmax = work[rank]; 9380ac07820SSatish Balay PetscFree(work); 9390ac07820SSatish Balay 9400ac07820SSatish Balay /* post receives: */ 9410ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 9420ac07820SSatish Balay CHKPTRQ(rvalues); 9430ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 9440ac07820SSatish Balay CHKPTRQ(recv_waits); 9450ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 9460ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 9470ac07820SSatish Balay } 9480ac07820SSatish Balay 9490ac07820SSatish Balay /* do sends: 9500ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 9510ac07820SSatish Balay the ith processor 9520ac07820SSatish Balay */ 9530ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 9540ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 9550ac07820SSatish Balay CHKPTRQ(send_waits); 9560ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 9570ac07820SSatish Balay starts[0] = 0; 9580ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 9590ac07820SSatish Balay for ( i=0; i<N; i++ ) { 9600ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 9610ac07820SSatish Balay } 9620ac07820SSatish Balay ISRestoreIndices(is,&rows); 9630ac07820SSatish Balay 9640ac07820SSatish Balay starts[0] = 0; 9650ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 9660ac07820SSatish Balay count = 0; 9670ac07820SSatish Balay for ( i=0; i<size; i++ ) { 9680ac07820SSatish Balay if (procs[i]) { 9690ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 9700ac07820SSatish Balay } 9710ac07820SSatish Balay } 9720ac07820SSatish Balay PetscFree(starts); 9730ac07820SSatish Balay 9740ac07820SSatish Balay base = owners[rank]*bs; 9750ac07820SSatish Balay 9760ac07820SSatish Balay /* wait on receives */ 9770ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 9780ac07820SSatish Balay source = lens + nrecvs; 9790ac07820SSatish Balay count = nrecvs; slen = 0; 9800ac07820SSatish Balay while (count) { 9810ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 9820ac07820SSatish Balay /* unpack receives into our local space */ 9830ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 9840ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 9850ac07820SSatish Balay lens[imdex] = n; 9860ac07820SSatish Balay slen += n; 9870ac07820SSatish Balay count--; 9880ac07820SSatish Balay } 9890ac07820SSatish Balay PetscFree(recv_waits); 9900ac07820SSatish Balay 9910ac07820SSatish Balay /* move the data into the send scatter */ 9920ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 9930ac07820SSatish Balay count = 0; 9940ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 9950ac07820SSatish Balay values = rvalues + i*nmax; 9960ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 9970ac07820SSatish Balay lrows[count++] = values[j] - base; 9980ac07820SSatish Balay } 9990ac07820SSatish Balay } 10000ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 10010ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 10020ac07820SSatish Balay 10030ac07820SSatish Balay /* actually zap the local rows */ 1004537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 10050ac07820SSatish Balay PLogObjectParent(A,istmp); 10060ac07820SSatish Balay PetscFree(lrows); 10070ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 10080ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 10090ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 10100ac07820SSatish Balay 10110ac07820SSatish Balay /* wait on sends */ 10120ac07820SSatish Balay if (nsends) { 10130ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 10140ac07820SSatish Balay CHKPTRQ(send_status); 10150ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 10160ac07820SSatish Balay PetscFree(send_status); 10170ac07820SSatish Balay } 10180ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 10190ac07820SSatish Balay 10200ac07820SSatish Balay return 0; 10210ac07820SSatish Balay } 1022ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 1023ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A) 1024ba4ca20aSSatish Balay { 1025ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1026ba4ca20aSSatish Balay 1027ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1028ba4ca20aSSatish Balay else return 0; 1029ba4ca20aSSatish Balay } 10300ac07820SSatish Balay 10310ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 10320ac07820SSatish Balay 103379bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 103479bdfe76SSatish Balay static struct _MatOps MatOps = { 1035bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 103693bc47c4SSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ, 103793bc47c4SSatish Balay MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ, 10380ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1039acdf5bf4SSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ, 104058667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 10410ac07820SSatish Balay MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,MatGetReordering_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ, 104293bc47c4SSatish Balay MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ, 104393bc47c4SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0, 1044905e6a2fSBarry Smith 0,0,0,MatConvertSameType_MPIBAIJ,0,0, 1045d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1046ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 10475a838052SSatish Balay MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ}; 104879bdfe76SSatish Balay 104979bdfe76SSatish Balay 105079bdfe76SSatish Balay /*@C 105179bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 105279bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 105379bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 105479bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 105579bdfe76SSatish Balay performance can be increased by more than a factor of 50. 105679bdfe76SSatish Balay 105779bdfe76SSatish Balay Input Parameters: 105879bdfe76SSatish Balay . comm - MPI communicator 105979bdfe76SSatish Balay . bs - size of blockk 106079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 106192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 106292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 106392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 106492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 106592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 106679bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 106792e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 106879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 106979bdfe76SSatish Balay submatrix (same for all local rows) 107092e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 107192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 107292e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 107392e8d321SLois Curfman McInnes it is zero. 107492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 107579bdfe76SSatish Balay submatrix (same for all local rows). 107692e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 107792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 107892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 107979bdfe76SSatish Balay 108079bdfe76SSatish Balay Output Parameter: 108179bdfe76SSatish Balay . A - the matrix 108279bdfe76SSatish Balay 108379bdfe76SSatish Balay Notes: 108479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 108579bdfe76SSatish Balay (possibly both). 108679bdfe76SSatish Balay 108779bdfe76SSatish Balay Storage Information: 108879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 108979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 109079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 109179bdfe76SSatish Balay local matrix (a rectangular submatrix). 109279bdfe76SSatish Balay 109379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 109479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 109579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 109679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 109779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 109879bdfe76SSatish Balay 109979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 110079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 110179bdfe76SSatish Balay 110279bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 110379bdfe76SSatish Balay $ ------------------- 110479bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 110579bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 110679bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 110779bdfe76SSatish Balay $ ------------------- 110879bdfe76SSatish Balay $ 110979bdfe76SSatish Balay 111079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 111179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 111279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 111357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 111479bdfe76SSatish Balay 111579bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 111679bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 111779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 111892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 111992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 112092e8d321SLois Curfman McInnes matrices and the file $(PETSC_DIR)/Performance. 112179bdfe76SSatish Balay 112292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 112379bdfe76SSatish Balay 112479bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 112579bdfe76SSatish Balay @*/ 112679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 112779bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 112879bdfe76SSatish Balay { 112979bdfe76SSatish Balay Mat B; 113079bdfe76SSatish Balay Mat_MPIBAIJ *b; 1131cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 113279bdfe76SSatish Balay 113379bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 113479bdfe76SSatish Balay *A = 0; 113579bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 113679bdfe76SSatish Balay PLogObjectCreate(B); 113779bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 113879bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 113979bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 114079bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 114179bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 114257b952d6SSatish Balay 114379bdfe76SSatish Balay B->factor = 0; 114479bdfe76SSatish Balay B->assembled = PETSC_FALSE; 114579bdfe76SSatish Balay 114679bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 114779bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 114879bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 114979bdfe76SSatish Balay 115079bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 115157b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1152cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 1153cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 1154cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1155cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 115679bdfe76SSatish Balay 115779bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 115879bdfe76SSatish Balay work[0] = m; work[1] = n; 115979bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 116079bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 116179bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 116279bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 116379bdfe76SSatish Balay } 116479bdfe76SSatish Balay if (m == PETSC_DECIDE) { 116579bdfe76SSatish Balay Mbs = M/bs; 116679bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 116779bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 116879bdfe76SSatish Balay m = mbs*bs; 116979bdfe76SSatish Balay } 117079bdfe76SSatish Balay if (n == PETSC_DECIDE) { 117179bdfe76SSatish Balay Nbs = N/bs; 117279bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 117379bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 117479bdfe76SSatish Balay n = nbs*bs; 117579bdfe76SSatish Balay } 1176cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 117779bdfe76SSatish Balay 117879bdfe76SSatish Balay b->m = m; B->m = m; 117979bdfe76SSatish Balay b->n = n; B->n = n; 118079bdfe76SSatish Balay b->N = N; B->N = N; 118179bdfe76SSatish Balay b->M = M; B->M = M; 118279bdfe76SSatish Balay b->bs = bs; 118379bdfe76SSatish Balay b->bs2 = bs*bs; 118479bdfe76SSatish Balay b->mbs = mbs; 118579bdfe76SSatish Balay b->nbs = nbs; 118679bdfe76SSatish Balay b->Mbs = Mbs; 118779bdfe76SSatish Balay b->Nbs = Nbs; 118879bdfe76SSatish Balay 118979bdfe76SSatish Balay /* build local table of row and column ownerships */ 119079bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 119179bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 11920ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 119379bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 119479bdfe76SSatish Balay b->rowners[0] = 0; 119579bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 119679bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 119779bdfe76SSatish Balay } 119879bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 119979bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 120079bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 120179bdfe76SSatish Balay b->cowners[0] = 0; 120279bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 120379bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 120479bdfe76SSatish Balay } 120579bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 120679bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 120779bdfe76SSatish Balay 120879bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 120979bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 121079bdfe76SSatish Balay PLogObjectParent(B,b->A); 121179bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 121279bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 121379bdfe76SSatish Balay PLogObjectParent(B,b->B); 121479bdfe76SSatish Balay 121579bdfe76SSatish Balay /* build cache for off array entries formed */ 121679bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 121779bdfe76SSatish Balay b->colmap = 0; 121879bdfe76SSatish Balay b->garray = 0; 121979bdfe76SSatish Balay b->roworiented = 1; 122079bdfe76SSatish Balay 122179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 122279bdfe76SSatish Balay b->lvec = 0; 122379bdfe76SSatish Balay b->Mvctx = 0; 122479bdfe76SSatish Balay 122579bdfe76SSatish Balay /* stuff for MatGetRow() */ 122679bdfe76SSatish Balay b->rowindices = 0; 122779bdfe76SSatish Balay b->rowvalues = 0; 122879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 122979bdfe76SSatish Balay 123079bdfe76SSatish Balay *A = B; 123179bdfe76SSatish Balay return 0; 123279bdfe76SSatish Balay } 12330ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 12340ac07820SSatish Balay { 12350ac07820SSatish Balay Mat mat; 12360ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 12370ac07820SSatish Balay int ierr, len=0, flg; 12380ac07820SSatish Balay 12390ac07820SSatish Balay *newmat = 0; 12400ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 12410ac07820SSatish Balay PLogObjectCreate(mat); 12420ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 12430ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 12440ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 12450ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 12460ac07820SSatish Balay mat->factor = matin->factor; 12470ac07820SSatish Balay mat->assembled = PETSC_TRUE; 12480ac07820SSatish Balay 12490ac07820SSatish Balay a->m = mat->m = oldmat->m; 12500ac07820SSatish Balay a->n = mat->n = oldmat->n; 12510ac07820SSatish Balay a->M = mat->M = oldmat->M; 12520ac07820SSatish Balay a->N = mat->N = oldmat->N; 12530ac07820SSatish Balay 12540ac07820SSatish Balay a->bs = oldmat->bs; 12550ac07820SSatish Balay a->bs2 = oldmat->bs2; 12560ac07820SSatish Balay a->mbs = oldmat->mbs; 12570ac07820SSatish Balay a->nbs = oldmat->nbs; 12580ac07820SSatish Balay a->Mbs = oldmat->Mbs; 12590ac07820SSatish Balay a->Nbs = oldmat->Nbs; 12600ac07820SSatish Balay 12610ac07820SSatish Balay a->rstart = oldmat->rstart; 12620ac07820SSatish Balay a->rend = oldmat->rend; 12630ac07820SSatish Balay a->cstart = oldmat->cstart; 12640ac07820SSatish Balay a->cend = oldmat->cend; 12650ac07820SSatish Balay a->size = oldmat->size; 12660ac07820SSatish Balay a->rank = oldmat->rank; 12670ac07820SSatish Balay a->insertmode = NOT_SET_VALUES; 12680ac07820SSatish Balay a->rowvalues = 0; 12690ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 12700ac07820SSatish Balay 12710ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 12720ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 12730ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 12740ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 12750ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 12760ac07820SSatish Balay if (oldmat->colmap) { 12770ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 12780ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 12790ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 12800ac07820SSatish Balay } else a->colmap = 0; 12810ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 12820ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 12830ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 12840ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 12850ac07820SSatish Balay } else a->garray = 0; 12860ac07820SSatish Balay 12870ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 12880ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 12890ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 12900ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 12910ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 12920ac07820SSatish Balay PLogObjectParent(mat,a->A); 12930ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 12940ac07820SSatish Balay PLogObjectParent(mat,a->B); 12950ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 12960ac07820SSatish Balay if (flg) { 12970ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 12980ac07820SSatish Balay } 12990ac07820SSatish Balay *newmat = mat; 13000ac07820SSatish Balay return 0; 13010ac07820SSatish Balay } 130257b952d6SSatish Balay 130357b952d6SSatish Balay #include "sys.h" 130457b952d6SSatish Balay 130557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 130657b952d6SSatish Balay { 130757b952d6SSatish Balay Mat A; 130857b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 130957b952d6SSatish Balay Scalar *vals,*buf; 131057b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 131157b952d6SSatish Balay MPI_Status status; 1312cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 131357b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 131457b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 131557b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 131657b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 131757b952d6SSatish Balay 131857b952d6SSatish Balay 131957b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 132057b952d6SSatish Balay bs2 = bs*bs; 132157b952d6SSatish Balay 132257b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 132357b952d6SSatish Balay if (!rank) { 132457b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 132557b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1326cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 132757b952d6SSatish Balay } 132857b952d6SSatish Balay 132957b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 133057b952d6SSatish Balay M = header[1]; N = header[2]; 133157b952d6SSatish Balay 133257b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 133357b952d6SSatish Balay 133457b952d6SSatish Balay /* 133557b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 133657b952d6SSatish Balay divisible by the blocksize 133757b952d6SSatish Balay */ 133857b952d6SSatish Balay Mbs = M/bs; 133957b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 134057b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 134157b952d6SSatish Balay else Mbs++; 134257b952d6SSatish Balay if (extra_rows &&!rank) { 1343537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 134457b952d6SSatish Balay } 1345537820f0SBarry Smith 134657b952d6SSatish Balay /* determine ownership of all rows */ 134757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 134857b952d6SSatish Balay m = mbs * bs; 1349cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1350cee3aa6bSSatish Balay browners = rowners + size + 1; 135157b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 135257b952d6SSatish Balay rowners[0] = 0; 1353cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1354cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 135557b952d6SSatish Balay rstart = rowners[rank]; 135657b952d6SSatish Balay rend = rowners[rank+1]; 135757b952d6SSatish Balay 135857b952d6SSatish Balay /* distribute row lengths to all processors */ 135957b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 136057b952d6SSatish Balay if (!rank) { 136157b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 136257b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 136357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 136457b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1365cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1366cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 136757b952d6SSatish Balay PetscFree(sndcounts); 136857b952d6SSatish Balay } 136957b952d6SSatish Balay else { 137057b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 137157b952d6SSatish Balay } 137257b952d6SSatish Balay 137357b952d6SSatish Balay if (!rank) { 137457b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 137557b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 137657b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 137757b952d6SSatish Balay for ( i=0; i<size; i++ ) { 137857b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 137957b952d6SSatish Balay procsnz[i] += rowlengths[j]; 138057b952d6SSatish Balay } 138157b952d6SSatish Balay } 138257b952d6SSatish Balay PetscFree(rowlengths); 138357b952d6SSatish Balay 138457b952d6SSatish Balay /* determine max buffer needed and allocate it */ 138557b952d6SSatish Balay maxnz = 0; 138657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 138757b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 138857b952d6SSatish Balay } 138957b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 139057b952d6SSatish Balay 139157b952d6SSatish Balay /* read in my part of the matrix column indices */ 139257b952d6SSatish Balay nz = procsnz[0]; 139357b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 139457b952d6SSatish Balay mycols = ibuf; 1395cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 139657b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1397cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1398cee3aa6bSSatish Balay 139957b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 140057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 140157b952d6SSatish Balay nz = procsnz[i]; 140257b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 140357b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 140457b952d6SSatish Balay } 140557b952d6SSatish Balay /* read in the stuff for the last proc */ 140657b952d6SSatish Balay if ( size != 1 ) { 140757b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 140857b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 140957b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1410cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 141157b952d6SSatish Balay } 141257b952d6SSatish Balay PetscFree(cols); 141357b952d6SSatish Balay } 141457b952d6SSatish Balay else { 141557b952d6SSatish Balay /* determine buffer space needed for message */ 141657b952d6SSatish Balay nz = 0; 141757b952d6SSatish Balay for ( i=0; i<m; i++ ) { 141857b952d6SSatish Balay nz += locrowlens[i]; 141957b952d6SSatish Balay } 142057b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 142157b952d6SSatish Balay mycols = ibuf; 142257b952d6SSatish Balay /* receive message of column indices*/ 142357b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 142457b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1425cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 142657b952d6SSatish Balay } 142757b952d6SSatish Balay 142857b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1429cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1430cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 143157b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1432cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 143357b952d6SSatish Balay masked1 = mask + Mbs; 143457b952d6SSatish Balay masked2 = masked1 + Mbs; 143557b952d6SSatish Balay rowcount = 0; nzcount = 0; 143657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 143757b952d6SSatish Balay dcount = 0; 143857b952d6SSatish Balay odcount = 0; 143957b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 144057b952d6SSatish Balay kmax = locrowlens[rowcount]; 144157b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 144257b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 144357b952d6SSatish Balay if (!mask[tmp]) { 144457b952d6SSatish Balay mask[tmp] = 1; 144557b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 144657b952d6SSatish Balay else masked1[dcount++] = tmp; 144757b952d6SSatish Balay } 144857b952d6SSatish Balay } 144957b952d6SSatish Balay rowcount++; 145057b952d6SSatish Balay } 1451cee3aa6bSSatish Balay 145257b952d6SSatish Balay dlens[i] = dcount; 145357b952d6SSatish Balay odlens[i] = odcount; 1454cee3aa6bSSatish Balay 145557b952d6SSatish Balay /* zero out the mask elements we set */ 145657b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 145757b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 145857b952d6SSatish Balay } 1459cee3aa6bSSatish Balay 146057b952d6SSatish Balay /* create our matrix */ 1461537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1462537820f0SBarry Smith CHKERRQ(ierr); 146357b952d6SSatish Balay A = *newmat; 14646d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 146557b952d6SSatish Balay 146657b952d6SSatish Balay if (!rank) { 146757b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 146857b952d6SSatish Balay /* read in my part of the matrix numerical values */ 146957b952d6SSatish Balay nz = procsnz[0]; 147057b952d6SSatish Balay vals = buf; 1471cee3aa6bSSatish Balay mycols = ibuf; 1472cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 147357b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1474cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1475537820f0SBarry Smith 147657b952d6SSatish Balay /* insert into matrix */ 147757b952d6SSatish Balay jj = rstart*bs; 147857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 147957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 148057b952d6SSatish Balay mycols += locrowlens[i]; 148157b952d6SSatish Balay vals += locrowlens[i]; 148257b952d6SSatish Balay jj++; 148357b952d6SSatish Balay } 148457b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 148557b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 148657b952d6SSatish Balay nz = procsnz[i]; 148757b952d6SSatish Balay vals = buf; 148857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 148957b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 149057b952d6SSatish Balay } 149157b952d6SSatish Balay /* the last proc */ 149257b952d6SSatish Balay if ( size != 1 ){ 149357b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1494cee3aa6bSSatish Balay vals = buf; 149557b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 149657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1497cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 149857b952d6SSatish Balay } 149957b952d6SSatish Balay PetscFree(procsnz); 150057b952d6SSatish Balay } 150157b952d6SSatish Balay else { 150257b952d6SSatish Balay /* receive numeric values */ 150357b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 150457b952d6SSatish Balay 150557b952d6SSatish Balay /* receive message of values*/ 150657b952d6SSatish Balay vals = buf; 1507cee3aa6bSSatish Balay mycols = ibuf; 150857b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 150957b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1510cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 151157b952d6SSatish Balay 151257b952d6SSatish Balay /* insert into matrix */ 151357b952d6SSatish Balay jj = rstart*bs; 1514cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 151557b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 151657b952d6SSatish Balay mycols += locrowlens[i]; 151757b952d6SSatish Balay vals += locrowlens[i]; 151857b952d6SSatish Balay jj++; 151957b952d6SSatish Balay } 152057b952d6SSatish Balay } 152157b952d6SSatish Balay PetscFree(locrowlens); 152257b952d6SSatish Balay PetscFree(buf); 152357b952d6SSatish Balay PetscFree(ibuf); 152457b952d6SSatish Balay PetscFree(rowners); 152557b952d6SSatish Balay PetscFree(dlens); 1526cee3aa6bSSatish Balay PetscFree(mask); 15276d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15286d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 152957b952d6SSatish Balay return 0; 153057b952d6SSatish Balay } 153157b952d6SSatish Balay 153257b952d6SSatish Balay 1533