179bdfe76SSatish Balay #ifndef lint 2*3b2fbd54SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.24 1996/08/23 22:21:21 curfman Exp bsmith $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 24*3b2fbd54SBarry Smith 25537820f0SBarry Smith /* 26537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2957b952d6SSatish Balay length of colmap equals the global matrix length. 3057b952d6SSatish Balay */ 3157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3257b952d6SSatish Balay { 3357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3457b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 3557b952d6SSatish Balay int nbs = B->nbs,i; 3657b952d6SSatish Balay 3757b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 3857b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3957b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 40905e6a2fSBarry Smith for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i + 1; 4157b952d6SSatish Balay return 0; 4257b952d6SSatish Balay } 4357b952d6SSatish Balay 44*3b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 45*3b2fbd54SBarry Smith PetscTruth *done) 4657b952d6SSatish Balay { 47*3b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 4857b952d6SSatish Balay int ierr; 49*3b2fbd54SBarry Smith if (aij->size == 1) { 50*3b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 51*3b2fbd54SBarry Smith } else SETERRQ(1,"MatGetRowIJ_MPIBAIJ:not supported in parallel"); 52*3b2fbd54SBarry Smith return 0; 53*3b2fbd54SBarry Smith } 54*3b2fbd54SBarry Smith 55*3b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 56*3b2fbd54SBarry Smith PetscTruth *done) 57*3b2fbd54SBarry Smith { 58*3b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 59*3b2fbd54SBarry Smith int ierr; 60*3b2fbd54SBarry Smith if (aij->size == 1) { 61*3b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 62*3b2fbd54SBarry Smith } else SETERRQ(1,"MatRestoreRowIJ_MPIBAIJ:not supported in parallel"); 6357b952d6SSatish Balay return 0; 6457b952d6SSatish Balay } 6557b952d6SSatish Balay 6657b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 6757b952d6SSatish Balay { 6857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 6957b952d6SSatish Balay Scalar value; 7057b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 7157b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 7257b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 7357b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 7457b952d6SSatish Balay 7557b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 7657b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 7757b952d6SSatish Balay } 7857b952d6SSatish Balay baij->insertmode = addv; 7957b952d6SSatish Balay rstart_orig = rstart*bs; 8057b952d6SSatish Balay rend_orig = rend*bs; 8157b952d6SSatish Balay cstart_orig = cstart*bs; 8257b952d6SSatish Balay cend_orig = cend*bs; 8357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 8457b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 8557b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 8657b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 8757b952d6SSatish Balay row = im[i] - rstart_orig; 8857b952d6SSatish Balay for ( j=0; j<n; j++ ) { 8957b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 9057b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 9157b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 9257b952d6SSatish Balay col = in[j] - cstart_orig; 9357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 9457b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 9557b952d6SSatish Balay } 9657b952d6SSatish Balay else { 9757b952d6SSatish Balay if (mat->was_assembled) { 98905e6a2fSBarry Smith if (!baij->colmap) { 99905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 100905e6a2fSBarry Smith } 101905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 10257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 10357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 10457b952d6SSatish Balay col = in[j]; 10557b952d6SSatish Balay } 10657b952d6SSatish Balay } 10757b952d6SSatish Balay else col = in[j]; 10857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 10957b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 11057b952d6SSatish Balay } 11157b952d6SSatish Balay } 11257b952d6SSatish Balay } 11357b952d6SSatish Balay else { 11457b952d6SSatish Balay if (roworiented) { 11557b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 11657b952d6SSatish Balay } 11757b952d6SSatish Balay else { 11857b952d6SSatish Balay row = im[i]; 11957b952d6SSatish Balay for ( j=0; j<n; j++ ) { 12057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 12157b952d6SSatish Balay } 12257b952d6SSatish Balay } 12357b952d6SSatish Balay } 12457b952d6SSatish Balay } 12557b952d6SSatish Balay return 0; 12657b952d6SSatish Balay } 12757b952d6SSatish Balay 128d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 129d6de1c52SSatish Balay { 130d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 131d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 132d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 133d6de1c52SSatish Balay 134d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 135d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 136d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 137d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 138d6de1c52SSatish Balay row = idxm[i] - bsrstart; 139d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 140d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 141d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 142d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 143d6de1c52SSatish Balay col = idxn[j] - bscstart; 144d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 145d6de1c52SSatish Balay } 146d6de1c52SSatish Balay else { 147905e6a2fSBarry Smith if (!baij->colmap) { 148905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 149905e6a2fSBarry Smith } 150e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 151e60e1c95SSatish Balay (baij->garray[baij->colmap[idxn[j]/bs]-1] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 152d9d09a02SSatish Balay else { 153905e6a2fSBarry Smith col = (baij->colmap[idxn[j]/bs]-1)*bs + idxn[j]%bs; 154d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 155d6de1c52SSatish Balay } 156d6de1c52SSatish Balay } 157d6de1c52SSatish Balay } 158d9d09a02SSatish Balay } 159d6de1c52SSatish Balay else { 160d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 161d6de1c52SSatish Balay } 162d6de1c52SSatish Balay } 163d6de1c52SSatish Balay return 0; 164d6de1c52SSatish Balay } 165d6de1c52SSatish Balay 166d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 167d6de1c52SSatish Balay { 168d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 169d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 170acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 171d6de1c52SSatish Balay double sum = 0.0; 172d6de1c52SSatish Balay Scalar *v; 173d6de1c52SSatish Balay 174d6de1c52SSatish Balay if (baij->size == 1) { 175d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 176d6de1c52SSatish Balay } else { 177d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 178d6de1c52SSatish Balay v = amat->a; 179d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 180d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 181d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 182d6de1c52SSatish Balay #else 183d6de1c52SSatish Balay sum += (*v)*(*v); v++; 184d6de1c52SSatish Balay #endif 185d6de1c52SSatish Balay } 186d6de1c52SSatish Balay v = bmat->a; 187d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 188d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 189d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 190d6de1c52SSatish Balay #else 191d6de1c52SSatish Balay sum += (*v)*(*v); v++; 192d6de1c52SSatish Balay #endif 193d6de1c52SSatish Balay } 194d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 195d6de1c52SSatish Balay *norm = sqrt(*norm); 196d6de1c52SSatish Balay } 197acdf5bf4SSatish Balay else 198acdf5bf4SSatish Balay SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 199d6de1c52SSatish Balay } 200d6de1c52SSatish Balay return 0; 201d6de1c52SSatish Balay } 20257b952d6SSatish Balay 20357b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 20457b952d6SSatish Balay { 20557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20657b952d6SSatish Balay MPI_Comm comm = mat->comm; 20757b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 20857b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 20957b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 21057b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 21157b952d6SSatish Balay InsertMode addv; 21257b952d6SSatish Balay Scalar *rvalues,*svalues; 21357b952d6SSatish Balay 21457b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 21557b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 21657b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 21757b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 21857b952d6SSatish Balay } 21957b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 22057b952d6SSatish Balay 22157b952d6SSatish Balay /* first count number of contributors to each processor */ 22257b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 22357b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 22457b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 22557b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 22657b952d6SSatish Balay idx = baij->stash.idx[i]; 22757b952d6SSatish Balay for ( j=0; j<size; j++ ) { 22857b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 22957b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 23057b952d6SSatish Balay } 23157b952d6SSatish Balay } 23257b952d6SSatish Balay } 23357b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 23457b952d6SSatish Balay 23557b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 23657b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 23757b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 23857b952d6SSatish Balay nreceives = work[rank]; 23957b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 24057b952d6SSatish Balay nmax = work[rank]; 24157b952d6SSatish Balay PetscFree(work); 24257b952d6SSatish Balay 24357b952d6SSatish Balay /* post receives: 24457b952d6SSatish Balay 1) each message will consist of ordered pairs 24557b952d6SSatish Balay (global index,value) we store the global index as a double 24657b952d6SSatish Balay to simplify the message passing. 24757b952d6SSatish Balay 2) since we don't know how long each individual message is we 24857b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 24957b952d6SSatish Balay this is a lot of wasted space. 25057b952d6SSatish Balay 25157b952d6SSatish Balay 25257b952d6SSatish Balay This could be done better. 25357b952d6SSatish Balay */ 25457b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 25557b952d6SSatish Balay CHKPTRQ(rvalues); 25657b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 25757b952d6SSatish Balay CHKPTRQ(recv_waits); 25857b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 25957b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 26057b952d6SSatish Balay comm,recv_waits+i); 26157b952d6SSatish Balay } 26257b952d6SSatish Balay 26357b952d6SSatish Balay /* do sends: 26457b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 26557b952d6SSatish Balay the ith processor 26657b952d6SSatish Balay */ 26757b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 26857b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 26957b952d6SSatish Balay CHKPTRQ(send_waits); 27057b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 27157b952d6SSatish Balay starts[0] = 0; 27257b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 27357b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 27457b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 27557b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 27657b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 27757b952d6SSatish Balay } 27857b952d6SSatish Balay PetscFree(owner); 27957b952d6SSatish Balay starts[0] = 0; 28057b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 28157b952d6SSatish Balay count = 0; 28257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 28357b952d6SSatish Balay if (procs[i]) { 28457b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 28557b952d6SSatish Balay comm,send_waits+count++); 28657b952d6SSatish Balay } 28757b952d6SSatish Balay } 28857b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 28957b952d6SSatish Balay 29057b952d6SSatish Balay /* Free cache space */ 29157b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 29257b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 29357b952d6SSatish Balay 29457b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 29557b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 29657b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 29757b952d6SSatish Balay baij->rmax = nmax; 29857b952d6SSatish Balay 29957b952d6SSatish Balay return 0; 30057b952d6SSatish Balay } 30157b952d6SSatish Balay 30257b952d6SSatish Balay 30357b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 30457b952d6SSatish Balay { 30557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30657b952d6SSatish Balay MPI_Status *send_status,recv_status; 30757b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 30857b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 30957b952d6SSatish Balay Scalar *values,val; 31057b952d6SSatish Balay InsertMode addv = baij->insertmode; 31157b952d6SSatish Balay 31257b952d6SSatish Balay /* wait on receives */ 31357b952d6SSatish Balay while (count) { 31457b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 31557b952d6SSatish Balay /* unpack receives into our local space */ 31657b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 31757b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 31857b952d6SSatish Balay n = n/3; 31957b952d6SSatish Balay for ( i=0; i<n; i++ ) { 32057b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 32157b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 32257b952d6SSatish Balay val = values[3*i+2]; 32357b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 32457b952d6SSatish Balay col -= baij->cstart*bs; 32557b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 32657b952d6SSatish Balay } 32757b952d6SSatish Balay else { 32857b952d6SSatish Balay if (mat->was_assembled) { 329905e6a2fSBarry Smith if (!baij->colmap) { 330905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 331905e6a2fSBarry Smith } 332905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 33357b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 33457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 33557b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 33657b952d6SSatish Balay } 33757b952d6SSatish Balay } 33857b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 33957b952d6SSatish Balay } 34057b952d6SSatish Balay } 34157b952d6SSatish Balay count--; 34257b952d6SSatish Balay } 34357b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 34457b952d6SSatish Balay 34557b952d6SSatish Balay /* wait on sends */ 34657b952d6SSatish Balay if (baij->nsends) { 34757b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 34857b952d6SSatish Balay CHKPTRQ(send_status); 34957b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 35057b952d6SSatish Balay PetscFree(send_status); 35157b952d6SSatish Balay } 35257b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 35357b952d6SSatish Balay 35457b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 35557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 35657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 35757b952d6SSatish Balay 35857b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 35957b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 36057b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 36157b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 36257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 36357b952d6SSatish Balay } 36457b952d6SSatish Balay 3656d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 36657b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 36757b952d6SSatish Balay } 36857b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 36957b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 37057b952d6SSatish Balay 37157b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 37257b952d6SSatish Balay return 0; 37357b952d6SSatish Balay } 37457b952d6SSatish Balay 37557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 37657b952d6SSatish Balay { 37757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 37857b952d6SSatish Balay int ierr; 37957b952d6SSatish Balay 38057b952d6SSatish Balay if (baij->size == 1) { 38157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 38257b952d6SSatish Balay } 38357b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 38457b952d6SSatish Balay return 0; 38557b952d6SSatish Balay } 38657b952d6SSatish Balay 38757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 38857b952d6SSatish Balay { 38957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 390cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 39157b952d6SSatish Balay FILE *fd; 39257b952d6SSatish Balay ViewerType vtype; 39357b952d6SSatish Balay 39457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 39557b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 39657b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 39757b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 3984e220ebcSLois Curfman McInnes MatInfo info; 39957b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 40057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 4014e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 40257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 40357b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 4044e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 4054e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 4064e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 4074e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 4084e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 4094e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 41057b952d6SSatish Balay fflush(fd); 41157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 41257b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 41357b952d6SSatish Balay return 0; 41457b952d6SSatish Balay } 41557b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 41657b952d6SSatish Balay return 0; 41757b952d6SSatish Balay } 41857b952d6SSatish Balay } 41957b952d6SSatish Balay 42057b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 42157b952d6SSatish Balay Draw draw; 42257b952d6SSatish Balay PetscTruth isnull; 42357b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 42457b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 42557b952d6SSatish Balay } 42657b952d6SSatish Balay 42757b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 42857b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 42957b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 43057b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 43157b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 43257b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 43357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 43457b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 43557b952d6SSatish Balay fflush(fd); 43657b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 43757b952d6SSatish Balay } 43857b952d6SSatish Balay else { 43957b952d6SSatish Balay int size = baij->size; 44057b952d6SSatish Balay rank = baij->rank; 44157b952d6SSatish Balay if (size == 1) { 44257b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 44357b952d6SSatish Balay } 44457b952d6SSatish Balay else { 44557b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 44657b952d6SSatish Balay Mat A; 44757b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 44857b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 44957b952d6SSatish Balay int mbs=baij->mbs; 45057b952d6SSatish Balay Scalar *a; 45157b952d6SSatish Balay 45257b952d6SSatish Balay if (!rank) { 453cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 45457b952d6SSatish Balay CHKERRQ(ierr); 45557b952d6SSatish Balay } 45657b952d6SSatish Balay else { 457cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 45857b952d6SSatish Balay CHKERRQ(ierr); 45957b952d6SSatish Balay } 46057b952d6SSatish Balay PLogObjectParent(mat,A); 46157b952d6SSatish Balay 46257b952d6SSatish Balay /* copy over the A part */ 46357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 46457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 46557b952d6SSatish Balay row = baij->rstart; 46657b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 46757b952d6SSatish Balay 46857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 46957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 47057b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 47157b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 47257b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 47357b952d6SSatish Balay for (k=0; k<bs; k++ ) { 474cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 475cee3aa6bSSatish Balay col++; a += bs; 47657b952d6SSatish Balay } 47757b952d6SSatish Balay } 47857b952d6SSatish Balay } 47957b952d6SSatish Balay /* copy over the B part */ 48057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 48157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 48257b952d6SSatish Balay row = baij->rstart*bs; 48357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 48457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 48557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 48657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 48757b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 48857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 489cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 490cee3aa6bSSatish Balay col++; a += bs; 49157b952d6SSatish Balay } 49257b952d6SSatish Balay } 49357b952d6SSatish Balay } 49457b952d6SSatish Balay PetscFree(rvals); 4956d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 4966d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 49757b952d6SSatish Balay if (!rank) { 49857b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 49957b952d6SSatish Balay } 50057b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 50157b952d6SSatish Balay } 50257b952d6SSatish Balay } 50357b952d6SSatish Balay return 0; 50457b952d6SSatish Balay } 50557b952d6SSatish Balay 50657b952d6SSatish Balay 50757b952d6SSatish Balay 50857b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 50957b952d6SSatish Balay { 51057b952d6SSatish Balay Mat mat = (Mat) obj; 51157b952d6SSatish Balay int ierr; 51257b952d6SSatish Balay ViewerType vtype; 51357b952d6SSatish Balay 51457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 51557b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 51657b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 51757b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 51857b952d6SSatish Balay } 51957b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 52057b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 52157b952d6SSatish Balay } 52257b952d6SSatish Balay return 0; 52357b952d6SSatish Balay } 52457b952d6SSatish Balay 52579bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 52679bdfe76SSatish Balay { 52779bdfe76SSatish Balay Mat mat = (Mat) obj; 52879bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 52979bdfe76SSatish Balay int ierr; 53079bdfe76SSatish Balay 53179bdfe76SSatish Balay #if defined(PETSC_LOG) 53279bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 53379bdfe76SSatish Balay #endif 53479bdfe76SSatish Balay 53579bdfe76SSatish Balay PetscFree(baij->rowners); 53679bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 53779bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 53879bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 53979bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 54079bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 54179bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 54279bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 54379bdfe76SSatish Balay PetscFree(baij); 54479bdfe76SSatish Balay PLogObjectDestroy(mat); 54579bdfe76SSatish Balay PetscHeaderDestroy(mat); 54679bdfe76SSatish Balay return 0; 54779bdfe76SSatish Balay } 54879bdfe76SSatish Balay 549cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 550cee3aa6bSSatish Balay { 551cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 55247b4a8eaSLois Curfman McInnes int ierr, nt; 553cee3aa6bSSatish Balay 554c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 55547b4a8eaSLois Curfman McInnes if (nt != a->n) { 5560ac07820SSatish Balay SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx"); 55747b4a8eaSLois Curfman McInnes } 558c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 55947b4a8eaSLois Curfman McInnes if (nt != a->m) { 5600ac07820SSatish Balay SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy"); 56147b4a8eaSLois Curfman McInnes } 562cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 563cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 564cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 565cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 566cee3aa6bSSatish Balay return 0; 567cee3aa6bSSatish Balay } 568cee3aa6bSSatish Balay 569cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 570cee3aa6bSSatish Balay { 571cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 572cee3aa6bSSatish Balay int ierr; 573cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 574cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 575cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 576cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 577cee3aa6bSSatish Balay return 0; 578cee3aa6bSSatish Balay } 579cee3aa6bSSatish Balay 580cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 581cee3aa6bSSatish Balay { 582cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 583cee3aa6bSSatish Balay int ierr; 584cee3aa6bSSatish Balay 585cee3aa6bSSatish Balay /* do nondiagonal part */ 586cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 587cee3aa6bSSatish Balay /* send it on its way */ 588537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 589cee3aa6bSSatish Balay /* do local part */ 590cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 591cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 592cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 593cee3aa6bSSatish Balay /* but is not perhaps always true. */ 594cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 595cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx);CHKERRQ(ierr); 596cee3aa6bSSatish Balay return 0; 597cee3aa6bSSatish Balay } 598cee3aa6bSSatish Balay 599cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600cee3aa6bSSatish Balay { 601cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 602cee3aa6bSSatish Balay int ierr; 603cee3aa6bSSatish Balay 604cee3aa6bSSatish Balay /* do nondiagonal part */ 605cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 606cee3aa6bSSatish Balay /* send it on its way */ 607537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 608cee3aa6bSSatish Balay /* do local part */ 609cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 610cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 611cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 612cee3aa6bSSatish Balay /* but is not perhaps always true. */ 613537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 614cee3aa6bSSatish Balay return 0; 615cee3aa6bSSatish Balay } 616cee3aa6bSSatish Balay 617cee3aa6bSSatish Balay /* 618cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 619cee3aa6bSSatish Balay diagonal block 620cee3aa6bSSatish Balay */ 621cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 622cee3aa6bSSatish Balay { 623cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 624cee3aa6bSSatish Balay if (a->M != a->N) 625cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 626cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 627cee3aa6bSSatish Balay } 628cee3aa6bSSatish Balay 629cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 630cee3aa6bSSatish Balay { 631cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 632cee3aa6bSSatish Balay int ierr; 633cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 634cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 635cee3aa6bSSatish Balay return 0; 636cee3aa6bSSatish Balay } 63757b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 63857b952d6SSatish Balay { 63957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 64057b952d6SSatish Balay *m = mat->M; *n = mat->N; 64157b952d6SSatish Balay return 0; 64257b952d6SSatish Balay } 64357b952d6SSatish Balay 64457b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 64557b952d6SSatish Balay { 64657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 64757b952d6SSatish Balay *m = mat->m; *n = mat->N; 64857b952d6SSatish Balay return 0; 64957b952d6SSatish Balay } 65057b952d6SSatish Balay 65157b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 65257b952d6SSatish Balay { 65357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 65457b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 65557b952d6SSatish Balay return 0; 65657b952d6SSatish Balay } 65757b952d6SSatish Balay 658acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 659acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 660acdf5bf4SSatish Balay 661acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 662acdf5bf4SSatish Balay { 663acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 664acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 665acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 666d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 667d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 668acdf5bf4SSatish Balay 669acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 670acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 671acdf5bf4SSatish Balay 672acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 673acdf5bf4SSatish Balay /* 674acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 675acdf5bf4SSatish Balay */ 676acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 677bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 678bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 679acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 680acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 681acdf5bf4SSatish Balay } 682acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 683acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 684acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 685acdf5bf4SSatish Balay } 686acdf5bf4SSatish Balay 687acdf5bf4SSatish Balay 688d9d09a02SSatish Balay if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 689d9d09a02SSatish Balay lrow = row - brstart; 690acdf5bf4SSatish Balay 691acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 692acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 693acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 694acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 695acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 696acdf5bf4SSatish Balay nztot = nzA + nzB; 697acdf5bf4SSatish Balay 698acdf5bf4SSatish Balay cmap = mat->garray; 699acdf5bf4SSatish Balay if (v || idx) { 700acdf5bf4SSatish Balay if (nztot) { 701acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 702acdf5bf4SSatish Balay int imark = -1; 703acdf5bf4SSatish Balay if (v) { 704acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 705acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 706d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 707acdf5bf4SSatish Balay else break; 708acdf5bf4SSatish Balay } 709acdf5bf4SSatish Balay imark = i; 710acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 711acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 712acdf5bf4SSatish Balay } 713acdf5bf4SSatish Balay if (idx) { 714acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 715acdf5bf4SSatish Balay if (imark > -1) { 716acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 717bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 718acdf5bf4SSatish Balay } 719acdf5bf4SSatish Balay } else { 720acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 721d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 722d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 723acdf5bf4SSatish Balay else break; 724acdf5bf4SSatish Balay } 725acdf5bf4SSatish Balay imark = i; 726acdf5bf4SSatish Balay } 727d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 728d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 729acdf5bf4SSatish Balay } 730acdf5bf4SSatish Balay } 731d212a18eSSatish Balay else { 732d212a18eSSatish Balay if (idx) *idx = 0; 733d212a18eSSatish Balay if (v) *v = 0; 734d212a18eSSatish Balay } 735acdf5bf4SSatish Balay } 736acdf5bf4SSatish Balay *nz = nztot; 737acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 738acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 739acdf5bf4SSatish Balay return 0; 740acdf5bf4SSatish Balay } 741acdf5bf4SSatish Balay 742acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 743acdf5bf4SSatish Balay { 744acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 745acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 746acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 747acdf5bf4SSatish Balay } 748acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 749acdf5bf4SSatish Balay return 0; 750acdf5bf4SSatish Balay } 751acdf5bf4SSatish Balay 7525a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 7535a838052SSatish Balay { 7545a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 7555a838052SSatish Balay *bs = baij->bs; 7565a838052SSatish Balay return 0; 7575a838052SSatish Balay } 7585a838052SSatish Balay 75958667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 76058667388SSatish Balay { 76158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 76258667388SSatish Balay int ierr; 76358667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 76458667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 76558667388SSatish Balay return 0; 76658667388SSatish Balay } 7670ac07820SSatish Balay 7684e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 7690ac07820SSatish Balay { 7704e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 7714e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 7727d57db60SLois Curfman McInnes int ierr; 7737d57db60SLois Curfman McInnes double isend[5], irecv[5]; 7740ac07820SSatish Balay 7754e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 7764e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 7774e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 7784e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 7794e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 7804e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 7814e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 7824e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 7834e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 7840ac07820SSatish Balay if (flag == MAT_LOCAL) { 7854e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7864e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7874e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7884e220ebcSLois Curfman McInnes info->memory = isend[3]; 7894e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7900ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 7910ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm); 7924e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7934e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7944e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7954e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7964e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7970ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 7980ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm); 7994e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8004e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8014e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8024e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8034e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8040ac07820SSatish Balay } 8054e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8064e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 8074e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 8080ac07820SSatish Balay return 0; 8090ac07820SSatish Balay } 8100ac07820SSatish Balay 81158667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 81258667388SSatish Balay { 81358667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 81458667388SSatish Balay 81558667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 81658667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 81758667388SSatish Balay op == MAT_COLUMNS_SORTED || 81858667388SSatish Balay op == MAT_ROW_ORIENTED) { 81958667388SSatish Balay MatSetOption(a->A,op); 82058667388SSatish Balay MatSetOption(a->B,op); 82158667388SSatish Balay } 82258667388SSatish Balay else if (op == MAT_ROWS_SORTED || 82358667388SSatish Balay op == MAT_SYMMETRIC || 82458667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 82558667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 82658667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 82758667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 82858667388SSatish Balay a->roworiented = 0; 82958667388SSatish Balay MatSetOption(a->A,op); 83058667388SSatish Balay MatSetOption(a->B,op); 83158667388SSatish Balay } 83258667388SSatish Balay else if (op == MAT_NO_NEW_DIAGONALS) 8330ac07820SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");} 83458667388SSatish Balay else 8350ac07820SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");} 83658667388SSatish Balay return 0; 83758667388SSatish Balay } 83858667388SSatish Balay 8390ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 8400ac07820SSatish Balay { 8410ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 8420ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 8430ac07820SSatish Balay Mat B; 8440ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 8450ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 8460ac07820SSatish Balay Scalar *a; 8470ac07820SSatish Balay 8480ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 8490ac07820SSatish Balay SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place"); 8500ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 8510ac07820SSatish Balay CHKERRQ(ierr); 8520ac07820SSatish Balay 8530ac07820SSatish Balay /* copy over the A part */ 8540ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 8550ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 8560ac07820SSatish Balay row = baij->rstart; 8570ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 8580ac07820SSatish Balay 8590ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 8600ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 8610ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 8620ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 8630ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 8640ac07820SSatish Balay for (k=0; k<bs; k++ ) { 8650ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 8660ac07820SSatish Balay col++; a += bs; 8670ac07820SSatish Balay } 8680ac07820SSatish Balay } 8690ac07820SSatish Balay } 8700ac07820SSatish Balay /* copy over the B part */ 8710ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 8720ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 8730ac07820SSatish Balay row = baij->rstart*bs; 8740ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 8750ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 8760ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 8770ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 8780ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 8790ac07820SSatish Balay for (k=0; k<bs; k++ ) { 8800ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 8810ac07820SSatish Balay col++; a += bs; 8820ac07820SSatish Balay } 8830ac07820SSatish Balay } 8840ac07820SSatish Balay } 8850ac07820SSatish Balay PetscFree(rvals); 8860ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8870ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8880ac07820SSatish Balay 8890ac07820SSatish Balay if (matout != PETSC_NULL) { 8900ac07820SSatish Balay *matout = B; 8910ac07820SSatish Balay } else { 8920ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 8930ac07820SSatish Balay PetscFree(baij->rowners); 8940ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 8950ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 8960ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 8970ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 8980ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 8990ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 9000ac07820SSatish Balay PetscFree(baij); 9010ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 9020ac07820SSatish Balay PetscHeaderDestroy(B); 9030ac07820SSatish Balay } 9040ac07820SSatish Balay return 0; 9050ac07820SSatish Balay } 9060ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 9070ac07820SSatish Balay matrix is square and the column and row owerships are identical. 9080ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 9090ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 9100ac07820SSatish Balay routine. 9110ac07820SSatish Balay */ 9120ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 9130ac07820SSatish Balay { 9140ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 9150ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 9160ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 9170ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 9180ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 9190ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 9200ac07820SSatish Balay MPI_Comm comm = A->comm; 9210ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 9220ac07820SSatish Balay MPI_Status recv_status,*send_status; 9230ac07820SSatish Balay IS istmp; 9240ac07820SSatish Balay 9250ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 9260ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 9270ac07820SSatish Balay 9280ac07820SSatish Balay /* first count number of contributors to each processor */ 9290ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 9300ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 9310ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 9320ac07820SSatish Balay for ( i=0; i<N; i++ ) { 9330ac07820SSatish Balay idx = rows[i]; 9340ac07820SSatish Balay found = 0; 9350ac07820SSatish Balay for ( j=0; j<size; j++ ) { 9360ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 9370ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 9380ac07820SSatish Balay } 9390ac07820SSatish Balay } 9400ac07820SSatish Balay if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range"); 9410ac07820SSatish Balay } 9420ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 9430ac07820SSatish Balay 9440ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 9450ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 9460ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 9470ac07820SSatish Balay nrecvs = work[rank]; 9480ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 9490ac07820SSatish Balay nmax = work[rank]; 9500ac07820SSatish Balay PetscFree(work); 9510ac07820SSatish Balay 9520ac07820SSatish Balay /* post receives: */ 9530ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 9540ac07820SSatish Balay CHKPTRQ(rvalues); 9550ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 9560ac07820SSatish Balay CHKPTRQ(recv_waits); 9570ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 9580ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 9590ac07820SSatish Balay } 9600ac07820SSatish Balay 9610ac07820SSatish Balay /* do sends: 9620ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 9630ac07820SSatish Balay the ith processor 9640ac07820SSatish Balay */ 9650ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 9660ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 9670ac07820SSatish Balay CHKPTRQ(send_waits); 9680ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 9690ac07820SSatish Balay starts[0] = 0; 9700ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 9710ac07820SSatish Balay for ( i=0; i<N; i++ ) { 9720ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 9730ac07820SSatish Balay } 9740ac07820SSatish Balay ISRestoreIndices(is,&rows); 9750ac07820SSatish Balay 9760ac07820SSatish Balay starts[0] = 0; 9770ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 9780ac07820SSatish Balay count = 0; 9790ac07820SSatish Balay for ( i=0; i<size; i++ ) { 9800ac07820SSatish Balay if (procs[i]) { 9810ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 9820ac07820SSatish Balay } 9830ac07820SSatish Balay } 9840ac07820SSatish Balay PetscFree(starts); 9850ac07820SSatish Balay 9860ac07820SSatish Balay base = owners[rank]*bs; 9870ac07820SSatish Balay 9880ac07820SSatish Balay /* wait on receives */ 9890ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 9900ac07820SSatish Balay source = lens + nrecvs; 9910ac07820SSatish Balay count = nrecvs; slen = 0; 9920ac07820SSatish Balay while (count) { 9930ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 9940ac07820SSatish Balay /* unpack receives into our local space */ 9950ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 9960ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 9970ac07820SSatish Balay lens[imdex] = n; 9980ac07820SSatish Balay slen += n; 9990ac07820SSatish Balay count--; 10000ac07820SSatish Balay } 10010ac07820SSatish Balay PetscFree(recv_waits); 10020ac07820SSatish Balay 10030ac07820SSatish Balay /* move the data into the send scatter */ 10040ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 10050ac07820SSatish Balay count = 0; 10060ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 10070ac07820SSatish Balay values = rvalues + i*nmax; 10080ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 10090ac07820SSatish Balay lrows[count++] = values[j] - base; 10100ac07820SSatish Balay } 10110ac07820SSatish Balay } 10120ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 10130ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 10140ac07820SSatish Balay 10150ac07820SSatish Balay /* actually zap the local rows */ 1016537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 10170ac07820SSatish Balay PLogObjectParent(A,istmp); 10180ac07820SSatish Balay PetscFree(lrows); 10190ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 10200ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 10210ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 10220ac07820SSatish Balay 10230ac07820SSatish Balay /* wait on sends */ 10240ac07820SSatish Balay if (nsends) { 10250ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 10260ac07820SSatish Balay CHKPTRQ(send_status); 10270ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 10280ac07820SSatish Balay PetscFree(send_status); 10290ac07820SSatish Balay } 10300ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 10310ac07820SSatish Balay 10320ac07820SSatish Balay return 0; 10330ac07820SSatish Balay } 1034ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 1035ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A) 1036ba4ca20aSSatish Balay { 1037ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1038ba4ca20aSSatish Balay 1039ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1040ba4ca20aSSatish Balay else return 0; 1041ba4ca20aSSatish Balay } 10420ac07820SSatish Balay 10430ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 10440ac07820SSatish Balay 104579bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 104679bdfe76SSatish Balay static struct _MatOps MatOps = { 1047bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 104893bc47c4SSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ, 104993bc47c4SSatish Balay MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ, 10500ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1051acdf5bf4SSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ, 105258667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1053*3b2fbd54SBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ, 105493bc47c4SSatish Balay MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ, 105593bc47c4SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0, 1056905e6a2fSBarry Smith 0,0,0,MatConvertSameType_MPIBAIJ,0,0, 1057d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1058ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1059*3b2fbd54SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1060*3b2fbd54SBarry Smith MatGetRowIJ_MPIBAIJ, 1061*3b2fbd54SBarry Smith MatRestoreRowIJ_MPIBAIJ}; 106279bdfe76SSatish Balay 106379bdfe76SSatish Balay 106479bdfe76SSatish Balay /*@C 106579bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 106679bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 106779bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 106879bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 106979bdfe76SSatish Balay performance can be increased by more than a factor of 50. 107079bdfe76SSatish Balay 107179bdfe76SSatish Balay Input Parameters: 107279bdfe76SSatish Balay . comm - MPI communicator 107379bdfe76SSatish Balay . bs - size of blockk 107479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 107592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 107692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 107792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 107892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 107992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 108079bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 108192e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 108279bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 108379bdfe76SSatish Balay submatrix (same for all local rows) 108492e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 108592e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 108692e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 108792e8d321SLois Curfman McInnes it is zero. 108892e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 108979bdfe76SSatish Balay submatrix (same for all local rows). 109092e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 109192e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 109292e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 109379bdfe76SSatish Balay 109479bdfe76SSatish Balay Output Parameter: 109579bdfe76SSatish Balay . A - the matrix 109679bdfe76SSatish Balay 109779bdfe76SSatish Balay Notes: 109879bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 109979bdfe76SSatish Balay (possibly both). 110079bdfe76SSatish Balay 110179bdfe76SSatish Balay Storage Information: 110279bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 110379bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 110479bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 110579bdfe76SSatish Balay local matrix (a rectangular submatrix). 110679bdfe76SSatish Balay 110779bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 110879bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 110979bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 111079bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 111179bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 111279bdfe76SSatish Balay 111379bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 111479bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 111579bdfe76SSatish Balay 111679bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 111779bdfe76SSatish Balay $ ------------------- 111879bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 111979bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 112079bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 112179bdfe76SSatish Balay $ ------------------- 112279bdfe76SSatish Balay $ 112379bdfe76SSatish Balay 112479bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 112579bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 112679bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 112757b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 112879bdfe76SSatish Balay 112979bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 113079bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 113179bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 113292e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 113392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 113492e8d321SLois Curfman McInnes matrices and the file $(PETSC_DIR)/Performance. 113579bdfe76SSatish Balay 113692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 113779bdfe76SSatish Balay 113879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 113979bdfe76SSatish Balay @*/ 114079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 114179bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 114279bdfe76SSatish Balay { 114379bdfe76SSatish Balay Mat B; 114479bdfe76SSatish Balay Mat_MPIBAIJ *b; 1145cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 114679bdfe76SSatish Balay 114779bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 114879bdfe76SSatish Balay *A = 0; 114979bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 115079bdfe76SSatish Balay PLogObjectCreate(B); 115179bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 115279bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 115379bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 115479bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 115579bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 115657b952d6SSatish Balay 115779bdfe76SSatish Balay B->factor = 0; 115879bdfe76SSatish Balay B->assembled = PETSC_FALSE; 115979bdfe76SSatish Balay 116079bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 116179bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 116279bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 116379bdfe76SSatish Balay 116479bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 116557b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1166cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 1167cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 1168cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1169cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 117079bdfe76SSatish Balay 117179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 117279bdfe76SSatish Balay work[0] = m; work[1] = n; 117379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 117479bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 117579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 117679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 117779bdfe76SSatish Balay } 117879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 117979bdfe76SSatish Balay Mbs = M/bs; 118079bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 118179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 118279bdfe76SSatish Balay m = mbs*bs; 118379bdfe76SSatish Balay } 118479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 118579bdfe76SSatish Balay Nbs = N/bs; 118679bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 118779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 118879bdfe76SSatish Balay n = nbs*bs; 118979bdfe76SSatish Balay } 1190cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 119179bdfe76SSatish Balay 119279bdfe76SSatish Balay b->m = m; B->m = m; 119379bdfe76SSatish Balay b->n = n; B->n = n; 119479bdfe76SSatish Balay b->N = N; B->N = N; 119579bdfe76SSatish Balay b->M = M; B->M = M; 119679bdfe76SSatish Balay b->bs = bs; 119779bdfe76SSatish Balay b->bs2 = bs*bs; 119879bdfe76SSatish Balay b->mbs = mbs; 119979bdfe76SSatish Balay b->nbs = nbs; 120079bdfe76SSatish Balay b->Mbs = Mbs; 120179bdfe76SSatish Balay b->Nbs = Nbs; 120279bdfe76SSatish Balay 120379bdfe76SSatish Balay /* build local table of row and column ownerships */ 120479bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 120579bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 12060ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 120779bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 120879bdfe76SSatish Balay b->rowners[0] = 0; 120979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 121079bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 121179bdfe76SSatish Balay } 121279bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 121379bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 121479bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 121579bdfe76SSatish Balay b->cowners[0] = 0; 121679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 121779bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 121879bdfe76SSatish Balay } 121979bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 122079bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 122179bdfe76SSatish Balay 122279bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 122379bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 122479bdfe76SSatish Balay PLogObjectParent(B,b->A); 122579bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 122679bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 122779bdfe76SSatish Balay PLogObjectParent(B,b->B); 122879bdfe76SSatish Balay 122979bdfe76SSatish Balay /* build cache for off array entries formed */ 123079bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 123179bdfe76SSatish Balay b->colmap = 0; 123279bdfe76SSatish Balay b->garray = 0; 123379bdfe76SSatish Balay b->roworiented = 1; 123479bdfe76SSatish Balay 123579bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 123679bdfe76SSatish Balay b->lvec = 0; 123779bdfe76SSatish Balay b->Mvctx = 0; 123879bdfe76SSatish Balay 123979bdfe76SSatish Balay /* stuff for MatGetRow() */ 124079bdfe76SSatish Balay b->rowindices = 0; 124179bdfe76SSatish Balay b->rowvalues = 0; 124279bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 124379bdfe76SSatish Balay 124479bdfe76SSatish Balay *A = B; 124579bdfe76SSatish Balay return 0; 124679bdfe76SSatish Balay } 12470ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 12480ac07820SSatish Balay { 12490ac07820SSatish Balay Mat mat; 12500ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 12510ac07820SSatish Balay int ierr, len=0, flg; 12520ac07820SSatish Balay 12530ac07820SSatish Balay *newmat = 0; 12540ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 12550ac07820SSatish Balay PLogObjectCreate(mat); 12560ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 12570ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 12580ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 12590ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 12600ac07820SSatish Balay mat->factor = matin->factor; 12610ac07820SSatish Balay mat->assembled = PETSC_TRUE; 12620ac07820SSatish Balay 12630ac07820SSatish Balay a->m = mat->m = oldmat->m; 12640ac07820SSatish Balay a->n = mat->n = oldmat->n; 12650ac07820SSatish Balay a->M = mat->M = oldmat->M; 12660ac07820SSatish Balay a->N = mat->N = oldmat->N; 12670ac07820SSatish Balay 12680ac07820SSatish Balay a->bs = oldmat->bs; 12690ac07820SSatish Balay a->bs2 = oldmat->bs2; 12700ac07820SSatish Balay a->mbs = oldmat->mbs; 12710ac07820SSatish Balay a->nbs = oldmat->nbs; 12720ac07820SSatish Balay a->Mbs = oldmat->Mbs; 12730ac07820SSatish Balay a->Nbs = oldmat->Nbs; 12740ac07820SSatish Balay 12750ac07820SSatish Balay a->rstart = oldmat->rstart; 12760ac07820SSatish Balay a->rend = oldmat->rend; 12770ac07820SSatish Balay a->cstart = oldmat->cstart; 12780ac07820SSatish Balay a->cend = oldmat->cend; 12790ac07820SSatish Balay a->size = oldmat->size; 12800ac07820SSatish Balay a->rank = oldmat->rank; 12810ac07820SSatish Balay a->insertmode = NOT_SET_VALUES; 12820ac07820SSatish Balay a->rowvalues = 0; 12830ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 12840ac07820SSatish Balay 12850ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 12860ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 12870ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 12880ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 12890ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 12900ac07820SSatish Balay if (oldmat->colmap) { 12910ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 12920ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 12930ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 12940ac07820SSatish Balay } else a->colmap = 0; 12950ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 12960ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 12970ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 12980ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 12990ac07820SSatish Balay } else a->garray = 0; 13000ac07820SSatish Balay 13010ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 13020ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 13030ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 13040ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 13050ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 13060ac07820SSatish Balay PLogObjectParent(mat,a->A); 13070ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 13080ac07820SSatish Balay PLogObjectParent(mat,a->B); 13090ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 13100ac07820SSatish Balay if (flg) { 13110ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 13120ac07820SSatish Balay } 13130ac07820SSatish Balay *newmat = mat; 13140ac07820SSatish Balay return 0; 13150ac07820SSatish Balay } 131657b952d6SSatish Balay 131757b952d6SSatish Balay #include "sys.h" 131857b952d6SSatish Balay 131957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 132057b952d6SSatish Balay { 132157b952d6SSatish Balay Mat A; 132257b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 132357b952d6SSatish Balay Scalar *vals,*buf; 132457b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 132557b952d6SSatish Balay MPI_Status status; 1326cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 132757b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 132857b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 132957b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 133057b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 133157b952d6SSatish Balay 133257b952d6SSatish Balay 133357b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 133457b952d6SSatish Balay bs2 = bs*bs; 133557b952d6SSatish Balay 133657b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 133757b952d6SSatish Balay if (!rank) { 133857b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 133957b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1340cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 134157b952d6SSatish Balay } 134257b952d6SSatish Balay 134357b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 134457b952d6SSatish Balay M = header[1]; N = header[2]; 134557b952d6SSatish Balay 134657b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 134757b952d6SSatish Balay 134857b952d6SSatish Balay /* 134957b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 135057b952d6SSatish Balay divisible by the blocksize 135157b952d6SSatish Balay */ 135257b952d6SSatish Balay Mbs = M/bs; 135357b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 135457b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 135557b952d6SSatish Balay else Mbs++; 135657b952d6SSatish Balay if (extra_rows &&!rank) { 1357537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 135857b952d6SSatish Balay } 1359537820f0SBarry Smith 136057b952d6SSatish Balay /* determine ownership of all rows */ 136157b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 136257b952d6SSatish Balay m = mbs * bs; 1363cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1364cee3aa6bSSatish Balay browners = rowners + size + 1; 136557b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 136657b952d6SSatish Balay rowners[0] = 0; 1367cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1368cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 136957b952d6SSatish Balay rstart = rowners[rank]; 137057b952d6SSatish Balay rend = rowners[rank+1]; 137157b952d6SSatish Balay 137257b952d6SSatish Balay /* distribute row lengths to all processors */ 137357b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 137457b952d6SSatish Balay if (!rank) { 137557b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 137657b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 137757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 137857b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1379cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1380cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 138157b952d6SSatish Balay PetscFree(sndcounts); 138257b952d6SSatish Balay } 138357b952d6SSatish Balay else { 138457b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 138557b952d6SSatish Balay } 138657b952d6SSatish Balay 138757b952d6SSatish Balay if (!rank) { 138857b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 138957b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 139057b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 139157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 139257b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 139357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 139457b952d6SSatish Balay } 139557b952d6SSatish Balay } 139657b952d6SSatish Balay PetscFree(rowlengths); 139757b952d6SSatish Balay 139857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 139957b952d6SSatish Balay maxnz = 0; 140057b952d6SSatish Balay for ( i=0; i<size; i++ ) { 140157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 140257b952d6SSatish Balay } 140357b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 140457b952d6SSatish Balay 140557b952d6SSatish Balay /* read in my part of the matrix column indices */ 140657b952d6SSatish Balay nz = procsnz[0]; 140757b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 140857b952d6SSatish Balay mycols = ibuf; 1409cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 141057b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1411cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1412cee3aa6bSSatish Balay 141357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 141457b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 141557b952d6SSatish Balay nz = procsnz[i]; 141657b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 141757b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 141857b952d6SSatish Balay } 141957b952d6SSatish Balay /* read in the stuff for the last proc */ 142057b952d6SSatish Balay if ( size != 1 ) { 142157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 142257b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 142357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1424cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 142557b952d6SSatish Balay } 142657b952d6SSatish Balay PetscFree(cols); 142757b952d6SSatish Balay } 142857b952d6SSatish Balay else { 142957b952d6SSatish Balay /* determine buffer space needed for message */ 143057b952d6SSatish Balay nz = 0; 143157b952d6SSatish Balay for ( i=0; i<m; i++ ) { 143257b952d6SSatish Balay nz += locrowlens[i]; 143357b952d6SSatish Balay } 143457b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 143557b952d6SSatish Balay mycols = ibuf; 143657b952d6SSatish Balay /* receive message of column indices*/ 143757b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 143857b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1439cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 144057b952d6SSatish Balay } 144157b952d6SSatish Balay 144257b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1443cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1444cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 144557b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1446cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 144757b952d6SSatish Balay masked1 = mask + Mbs; 144857b952d6SSatish Balay masked2 = masked1 + Mbs; 144957b952d6SSatish Balay rowcount = 0; nzcount = 0; 145057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 145157b952d6SSatish Balay dcount = 0; 145257b952d6SSatish Balay odcount = 0; 145357b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 145457b952d6SSatish Balay kmax = locrowlens[rowcount]; 145557b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 145657b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 145757b952d6SSatish Balay if (!mask[tmp]) { 145857b952d6SSatish Balay mask[tmp] = 1; 145957b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 146057b952d6SSatish Balay else masked1[dcount++] = tmp; 146157b952d6SSatish Balay } 146257b952d6SSatish Balay } 146357b952d6SSatish Balay rowcount++; 146457b952d6SSatish Balay } 1465cee3aa6bSSatish Balay 146657b952d6SSatish Balay dlens[i] = dcount; 146757b952d6SSatish Balay odlens[i] = odcount; 1468cee3aa6bSSatish Balay 146957b952d6SSatish Balay /* zero out the mask elements we set */ 147057b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 147157b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 147257b952d6SSatish Balay } 1473cee3aa6bSSatish Balay 147457b952d6SSatish Balay /* create our matrix */ 1475537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1476537820f0SBarry Smith CHKERRQ(ierr); 147757b952d6SSatish Balay A = *newmat; 14786d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 147957b952d6SSatish Balay 148057b952d6SSatish Balay if (!rank) { 148157b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 148257b952d6SSatish Balay /* read in my part of the matrix numerical values */ 148357b952d6SSatish Balay nz = procsnz[0]; 148457b952d6SSatish Balay vals = buf; 1485cee3aa6bSSatish Balay mycols = ibuf; 1486cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 148757b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1488cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1489537820f0SBarry Smith 149057b952d6SSatish Balay /* insert into matrix */ 149157b952d6SSatish Balay jj = rstart*bs; 149257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 149357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 149457b952d6SSatish Balay mycols += locrowlens[i]; 149557b952d6SSatish Balay vals += locrowlens[i]; 149657b952d6SSatish Balay jj++; 149757b952d6SSatish Balay } 149857b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 149957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 150057b952d6SSatish Balay nz = procsnz[i]; 150157b952d6SSatish Balay vals = buf; 150257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 150357b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 150457b952d6SSatish Balay } 150557b952d6SSatish Balay /* the last proc */ 150657b952d6SSatish Balay if ( size != 1 ){ 150757b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1508cee3aa6bSSatish Balay vals = buf; 150957b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 151057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1511cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 151257b952d6SSatish Balay } 151357b952d6SSatish Balay PetscFree(procsnz); 151457b952d6SSatish Balay } 151557b952d6SSatish Balay else { 151657b952d6SSatish Balay /* receive numeric values */ 151757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 151857b952d6SSatish Balay 151957b952d6SSatish Balay /* receive message of values*/ 152057b952d6SSatish Balay vals = buf; 1521cee3aa6bSSatish Balay mycols = ibuf; 152257b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 152357b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1524cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 152557b952d6SSatish Balay 152657b952d6SSatish Balay /* insert into matrix */ 152757b952d6SSatish Balay jj = rstart*bs; 1528cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 152957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 153057b952d6SSatish Balay mycols += locrowlens[i]; 153157b952d6SSatish Balay vals += locrowlens[i]; 153257b952d6SSatish Balay jj++; 153357b952d6SSatish Balay } 153457b952d6SSatish Balay } 153557b952d6SSatish Balay PetscFree(locrowlens); 153657b952d6SSatish Balay PetscFree(buf); 153757b952d6SSatish Balay PetscFree(ibuf); 153857b952d6SSatish Balay PetscFree(rowners); 153957b952d6SSatish Balay PetscFree(dlens); 1540cee3aa6bSSatish Balay PetscFree(mask); 15416d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15426d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 154357b952d6SSatish Balay return 0; 154457b952d6SSatish Balay } 154557b952d6SSatish Balay 154657b952d6SSatish Balay 1547