179bdfe76SSatish Balay #ifndef lint 2*dcb20de4SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.40 1996/12/03 15:49:52 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 243b2fbd54SBarry Smith 25537820f0SBarry Smith /* 26537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2957b952d6SSatish Balay length of colmap equals the global matrix length. 3057b952d6SSatish Balay */ 3157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3257b952d6SSatish Balay { 3357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3457b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 35928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3657b952d6SSatish Balay 3757b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 3857b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3957b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 40928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4157b952d6SSatish Balay return 0; 4257b952d6SSatish Balay } 4357b952d6SSatish Balay 443b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 453b2fbd54SBarry Smith PetscTruth *done) 4657b952d6SSatish Balay { 473b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 4857b952d6SSatish Balay int ierr; 493b2fbd54SBarry Smith if (aij->size == 1) { 503b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 513b2fbd54SBarry Smith } else SETERRQ(1,"MatGetRowIJ_MPIBAIJ:not supported in parallel"); 523b2fbd54SBarry Smith return 0; 533b2fbd54SBarry Smith } 543b2fbd54SBarry Smith 553b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 563b2fbd54SBarry Smith PetscTruth *done) 573b2fbd54SBarry Smith { 583b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 593b2fbd54SBarry Smith int ierr; 603b2fbd54SBarry Smith if (aij->size == 1) { 613b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 623b2fbd54SBarry Smith } else SETERRQ(1,"MatRestoreRowIJ_MPIBAIJ:not supported in parallel"); 6357b952d6SSatish Balay return 0; 6457b952d6SSatish Balay } 6557b952d6SSatish Balay 66639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 6757b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 6857b952d6SSatish Balay { 6957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 7057b952d6SSatish Balay Scalar value; 714fa0d573SSatish Balay int ierr,i,j,row,col; 724fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 734fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 744fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 7557b952d6SSatish Balay 762c3acbe9SBarry Smith #if defined(PETSC_BOPT_g) 7757b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 7857b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 7957b952d6SSatish Balay } 802c3acbe9SBarry Smith #endif 8157b952d6SSatish Balay baij->insertmode = addv; 8257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 83639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 8457b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 8557b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 86639f9d9dSBarry Smith #endif 8757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 8857b952d6SSatish Balay row = im[i] - rstart_orig; 8957b952d6SSatish Balay for ( j=0; j<n; j++ ) { 9057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 9157b952d6SSatish Balay col = in[j] - cstart_orig; 9257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 93639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 9457b952d6SSatish Balay } 95639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 96639f9d9dSBarry Smith else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");} 97639f9d9dSBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");} 98639f9d9dSBarry Smith #endif 9957b952d6SSatish Balay else { 10057b952d6SSatish Balay if (mat->was_assembled) { 101905e6a2fSBarry Smith if (!baij->colmap) { 102905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 103905e6a2fSBarry Smith } 104905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 10557b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 10657b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 10757b952d6SSatish Balay col = in[j]; 10857b952d6SSatish Balay } 10957b952d6SSatish Balay } 11057b952d6SSatish Balay else col = in[j]; 11157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 112639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 11357b952d6SSatish Balay } 11457b952d6SSatish Balay } 11557b952d6SSatish Balay } 11657b952d6SSatish Balay else { 11790f02eecSBarry Smith if (roworiented && !baij->donotstash) { 11857b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 11957b952d6SSatish Balay } 12057b952d6SSatish Balay else { 12190f02eecSBarry Smith if (!baij->donotstash) { 12257b952d6SSatish Balay row = im[i]; 12357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 12457b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 12557b952d6SSatish Balay } 12657b952d6SSatish Balay } 12757b952d6SSatish Balay } 12857b952d6SSatish Balay } 12990f02eecSBarry Smith } 13057b952d6SSatish Balay return 0; 13157b952d6SSatish Balay } 13257b952d6SSatish Balay 133d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 134d6de1c52SSatish Balay { 135d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 136d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 137d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 138d6de1c52SSatish Balay 139d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 140d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 141d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 142d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 143d6de1c52SSatish Balay row = idxm[i] - bsrstart; 144d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 145d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 146d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 147d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 148d6de1c52SSatish Balay col = idxn[j] - bscstart; 149d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 150d6de1c52SSatish Balay } 151d6de1c52SSatish Balay else { 152905e6a2fSBarry Smith if (!baij->colmap) { 153905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 154905e6a2fSBarry Smith } 155e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 156*dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 157d9d09a02SSatish Balay else { 158*dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 159d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 160d6de1c52SSatish Balay } 161d6de1c52SSatish Balay } 162d6de1c52SSatish Balay } 163d9d09a02SSatish Balay } 164d6de1c52SSatish Balay else { 165d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 166d6de1c52SSatish Balay } 167d6de1c52SSatish Balay } 168d6de1c52SSatish Balay return 0; 169d6de1c52SSatish Balay } 170d6de1c52SSatish Balay 171d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 172d6de1c52SSatish Balay { 173d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 174d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 175acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 176d6de1c52SSatish Balay double sum = 0.0; 177d6de1c52SSatish Balay Scalar *v; 178d6de1c52SSatish Balay 179d6de1c52SSatish Balay if (baij->size == 1) { 180d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 181d6de1c52SSatish Balay } else { 182d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 183d6de1c52SSatish Balay v = amat->a; 184d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 185d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 186d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 187d6de1c52SSatish Balay #else 188d6de1c52SSatish Balay sum += (*v)*(*v); v++; 189d6de1c52SSatish Balay #endif 190d6de1c52SSatish Balay } 191d6de1c52SSatish Balay v = bmat->a; 192d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 193d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 194d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 195d6de1c52SSatish Balay #else 196d6de1c52SSatish Balay sum += (*v)*(*v); v++; 197d6de1c52SSatish Balay #endif 198d6de1c52SSatish Balay } 199d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 200d6de1c52SSatish Balay *norm = sqrt(*norm); 201d6de1c52SSatish Balay } 202acdf5bf4SSatish Balay else 203b0267e0aSLois Curfman McInnes SETERRQ(PETSC_ERR_SUP,"MatNorm_MPIBAIJ:No support for this norm yet"); 204d6de1c52SSatish Balay } 205d6de1c52SSatish Balay return 0; 206d6de1c52SSatish Balay } 20757b952d6SSatish Balay 20857b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 20957b952d6SSatish Balay { 21057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 21157b952d6SSatish Balay MPI_Comm comm = mat->comm; 21257b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 21357b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 21457b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 21557b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 21657b952d6SSatish Balay InsertMode addv; 21757b952d6SSatish Balay Scalar *rvalues,*svalues; 21857b952d6SSatish Balay 21957b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 22057b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 22157b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 22257b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 22357b952d6SSatish Balay } 22457b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 22557b952d6SSatish Balay 22657b952d6SSatish Balay /* first count number of contributors to each processor */ 22757b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 22857b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 22957b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 23057b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 23157b952d6SSatish Balay idx = baij->stash.idx[i]; 23257b952d6SSatish Balay for ( j=0; j<size; j++ ) { 23357b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 23457b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 23557b952d6SSatish Balay } 23657b952d6SSatish Balay } 23757b952d6SSatish Balay } 23857b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 23957b952d6SSatish Balay 24057b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 24157b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 24257b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 24357b952d6SSatish Balay nreceives = work[rank]; 24457b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 24557b952d6SSatish Balay nmax = work[rank]; 24657b952d6SSatish Balay PetscFree(work); 24757b952d6SSatish Balay 24857b952d6SSatish Balay /* post receives: 24957b952d6SSatish Balay 1) each message will consist of ordered pairs 25057b952d6SSatish Balay (global index,value) we store the global index as a double 25157b952d6SSatish Balay to simplify the message passing. 25257b952d6SSatish Balay 2) since we don't know how long each individual message is we 25357b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 25457b952d6SSatish Balay this is a lot of wasted space. 25557b952d6SSatish Balay 25657b952d6SSatish Balay 25757b952d6SSatish Balay This could be done better. 25857b952d6SSatish Balay */ 25957b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 26057b952d6SSatish Balay CHKPTRQ(rvalues); 26157b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 26257b952d6SSatish Balay CHKPTRQ(recv_waits); 26357b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 26457b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 26557b952d6SSatish Balay comm,recv_waits+i); 26657b952d6SSatish Balay } 26757b952d6SSatish Balay 26857b952d6SSatish Balay /* do sends: 26957b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 27057b952d6SSatish Balay the ith processor 27157b952d6SSatish Balay */ 27257b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 27357b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 27457b952d6SSatish Balay CHKPTRQ(send_waits); 27557b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 27657b952d6SSatish Balay starts[0] = 0; 27757b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 27857b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 27957b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 28057b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 28157b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 28257b952d6SSatish Balay } 28357b952d6SSatish Balay PetscFree(owner); 28457b952d6SSatish Balay starts[0] = 0; 28557b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 28657b952d6SSatish Balay count = 0; 28757b952d6SSatish Balay for ( i=0; i<size; i++ ) { 28857b952d6SSatish Balay if (procs[i]) { 28957b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 29057b952d6SSatish Balay comm,send_waits+count++); 29157b952d6SSatish Balay } 29257b952d6SSatish Balay } 29357b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 29457b952d6SSatish Balay 29557b952d6SSatish Balay /* Free cache space */ 296d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 29757b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 29857b952d6SSatish Balay 29957b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 30057b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 30157b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 30257b952d6SSatish Balay baij->rmax = nmax; 30357b952d6SSatish Balay 30457b952d6SSatish Balay return 0; 30557b952d6SSatish Balay } 30657b952d6SSatish Balay 30757b952d6SSatish Balay 30857b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 30957b952d6SSatish Balay { 31057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 31157b952d6SSatish Balay MPI_Status *send_status,recv_status; 31257b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 31357b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 31457b952d6SSatish Balay Scalar *values,val; 31557b952d6SSatish Balay InsertMode addv = baij->insertmode; 31657b952d6SSatish Balay 31757b952d6SSatish Balay /* wait on receives */ 31857b952d6SSatish Balay while (count) { 31957b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 32057b952d6SSatish Balay /* unpack receives into our local space */ 32157b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 32257b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 32357b952d6SSatish Balay n = n/3; 32457b952d6SSatish Balay for ( i=0; i<n; i++ ) { 32557b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 32657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 32757b952d6SSatish Balay val = values[3*i+2]; 32857b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 32957b952d6SSatish Balay col -= baij->cstart*bs; 33057b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 33157b952d6SSatish Balay } 33257b952d6SSatish Balay else { 33357b952d6SSatish Balay if (mat->was_assembled) { 334905e6a2fSBarry Smith if (!baij->colmap) { 335905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 336905e6a2fSBarry Smith } 337905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 33857b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 33957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 34057b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 34157b952d6SSatish Balay } 34257b952d6SSatish Balay } 34357b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 34457b952d6SSatish Balay } 34557b952d6SSatish Balay } 34657b952d6SSatish Balay count--; 34757b952d6SSatish Balay } 34857b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 34957b952d6SSatish Balay 35057b952d6SSatish Balay /* wait on sends */ 35157b952d6SSatish Balay if (baij->nsends) { 35257b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 35357b952d6SSatish Balay CHKPTRQ(send_status); 35457b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 35557b952d6SSatish Balay PetscFree(send_status); 35657b952d6SSatish Balay } 35757b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 35857b952d6SSatish Balay 35957b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 36057b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 36157b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 36257b952d6SSatish Balay 36357b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 36457b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 36557b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 36657b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 36757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 36857b952d6SSatish Balay } 36957b952d6SSatish Balay 3706d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 37157b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 37257b952d6SSatish Balay } 37357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 37457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 37557b952d6SSatish Balay 37657b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 37757b952d6SSatish Balay return 0; 37857b952d6SSatish Balay } 37957b952d6SSatish Balay 38057b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 38157b952d6SSatish Balay { 38257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 38357b952d6SSatish Balay int ierr; 38457b952d6SSatish Balay 38557b952d6SSatish Balay if (baij->size == 1) { 38657b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 38757b952d6SSatish Balay } 38857b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 38957b952d6SSatish Balay return 0; 39057b952d6SSatish Balay } 39157b952d6SSatish Balay 39257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 39357b952d6SSatish Balay { 39457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 395cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 39657b952d6SSatish Balay FILE *fd; 39757b952d6SSatish Balay ViewerType vtype; 39857b952d6SSatish Balay 39957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 40057b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 40157b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 402639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 4034e220ebcSLois Curfman McInnes MatInfo info; 40457b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 40557b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 4064e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 40757b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 40857b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 4094e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 4104e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 4114e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 4124e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 4134e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 4144e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 41557b952d6SSatish Balay fflush(fd); 41657b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 41757b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 41857b952d6SSatish Balay return 0; 41957b952d6SSatish Balay } 420639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 421bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 42257b952d6SSatish Balay return 0; 42357b952d6SSatish Balay } 42457b952d6SSatish Balay } 42557b952d6SSatish Balay 42657b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 42757b952d6SSatish Balay Draw draw; 42857b952d6SSatish Balay PetscTruth isnull; 42957b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 43057b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 43157b952d6SSatish Balay } 43257b952d6SSatish Balay 43357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 43457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 43557b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 43657b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 43757b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 43857b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 43957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 44057b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 44157b952d6SSatish Balay fflush(fd); 44257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 44357b952d6SSatish Balay } 44457b952d6SSatish Balay else { 44557b952d6SSatish Balay int size = baij->size; 44657b952d6SSatish Balay rank = baij->rank; 44757b952d6SSatish Balay if (size == 1) { 44857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 44957b952d6SSatish Balay } 45057b952d6SSatish Balay else { 45157b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 45257b952d6SSatish Balay Mat A; 45357b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 45457b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 45557b952d6SSatish Balay int mbs=baij->mbs; 45657b952d6SSatish Balay Scalar *a; 45757b952d6SSatish Balay 45857b952d6SSatish Balay if (!rank) { 459cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 46057b952d6SSatish Balay CHKERRQ(ierr); 46157b952d6SSatish Balay } 46257b952d6SSatish Balay else { 463cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 46457b952d6SSatish Balay CHKERRQ(ierr); 46557b952d6SSatish Balay } 46657b952d6SSatish Balay PLogObjectParent(mat,A); 46757b952d6SSatish Balay 46857b952d6SSatish Balay /* copy over the A part */ 46957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 47057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 47157b952d6SSatish Balay row = baij->rstart; 47257b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 47357b952d6SSatish Balay 47457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 47557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 47657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 47757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 47857b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 47957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 480cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 481cee3aa6bSSatish Balay col++; a += bs; 48257b952d6SSatish Balay } 48357b952d6SSatish Balay } 48457b952d6SSatish Balay } 48557b952d6SSatish Balay /* copy over the B part */ 48657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 48757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 48857b952d6SSatish Balay row = baij->rstart*bs; 48957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 49057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 49157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 49257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 49357b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 49457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 495cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 496cee3aa6bSSatish Balay col++; a += bs; 49757b952d6SSatish Balay } 49857b952d6SSatish Balay } 49957b952d6SSatish Balay } 50057b952d6SSatish Balay PetscFree(rvals); 5016d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5026d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 50357b952d6SSatish Balay if (!rank) { 50457b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 50557b952d6SSatish Balay } 50657b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 50757b952d6SSatish Balay } 50857b952d6SSatish Balay } 50957b952d6SSatish Balay return 0; 51057b952d6SSatish Balay } 51157b952d6SSatish Balay 51257b952d6SSatish Balay 51357b952d6SSatish Balay 51457b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 51557b952d6SSatish Balay { 51657b952d6SSatish Balay Mat mat = (Mat) obj; 51757b952d6SSatish Balay int ierr; 51857b952d6SSatish Balay ViewerType vtype; 51957b952d6SSatish Balay 52057b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 52157b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 52257b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 52357b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 52457b952d6SSatish Balay } 52557b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 52657b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 52757b952d6SSatish Balay } 52857b952d6SSatish Balay return 0; 52957b952d6SSatish Balay } 53057b952d6SSatish Balay 53179bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 53279bdfe76SSatish Balay { 53379bdfe76SSatish Balay Mat mat = (Mat) obj; 53479bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 53579bdfe76SSatish Balay int ierr; 53679bdfe76SSatish Balay 53779bdfe76SSatish Balay #if defined(PETSC_LOG) 53879bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 53979bdfe76SSatish Balay #endif 54079bdfe76SSatish Balay 54179bdfe76SSatish Balay PetscFree(baij->rowners); 54279bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 54379bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 54479bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 54579bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 54679bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 54779bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 54879bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 54979bdfe76SSatish Balay PetscFree(baij); 55090f02eecSBarry Smith if (mat->mapping) { 55190f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 55290f02eecSBarry Smith } 55379bdfe76SSatish Balay PLogObjectDestroy(mat); 55479bdfe76SSatish Balay PetscHeaderDestroy(mat); 55579bdfe76SSatish Balay return 0; 55679bdfe76SSatish Balay } 55779bdfe76SSatish Balay 558cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 559cee3aa6bSSatish Balay { 560cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 56147b4a8eaSLois Curfman McInnes int ierr, nt; 562cee3aa6bSSatish Balay 563c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 56447b4a8eaSLois Curfman McInnes if (nt != a->n) { 5650ac07820SSatish Balay SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and xx"); 56647b4a8eaSLois Curfman McInnes } 567c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 56847b4a8eaSLois Curfman McInnes if (nt != a->m) { 5690ac07820SSatish Balay SETERRQ(1,"MatMult_MPIBAIJ:Incompatible parition of A and yy"); 57047b4a8eaSLois Curfman McInnes } 57143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 57343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 57543a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576cee3aa6bSSatish Balay return 0; 577cee3aa6bSSatish Balay } 578cee3aa6bSSatish Balay 579cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 580cee3aa6bSSatish Balay { 581cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 582cee3aa6bSSatish Balay int ierr; 58343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 58543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 587cee3aa6bSSatish Balay return 0; 588cee3aa6bSSatish Balay } 589cee3aa6bSSatish Balay 590cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 591cee3aa6bSSatish Balay { 592cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 593cee3aa6bSSatish Balay int ierr; 594cee3aa6bSSatish Balay 595cee3aa6bSSatish Balay /* do nondiagonal part */ 596cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 597cee3aa6bSSatish Balay /* send it on its way */ 598537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 599cee3aa6bSSatish Balay /* do local part */ 600cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 601cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 602cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 603cee3aa6bSSatish Balay /* but is not perhaps always true. */ 604639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605cee3aa6bSSatish Balay return 0; 606cee3aa6bSSatish Balay } 607cee3aa6bSSatish Balay 608cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 609cee3aa6bSSatish Balay { 610cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 611cee3aa6bSSatish Balay int ierr; 612cee3aa6bSSatish Balay 613cee3aa6bSSatish Balay /* do nondiagonal part */ 614cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 615cee3aa6bSSatish Balay /* send it on its way */ 616537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 617cee3aa6bSSatish Balay /* do local part */ 618cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 619cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 620cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 621cee3aa6bSSatish Balay /* but is not perhaps always true. */ 622537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 623cee3aa6bSSatish Balay return 0; 624cee3aa6bSSatish Balay } 625cee3aa6bSSatish Balay 626cee3aa6bSSatish Balay /* 627cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 628cee3aa6bSSatish Balay diagonal block 629cee3aa6bSSatish Balay */ 630cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 631cee3aa6bSSatish Balay { 632cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 633cee3aa6bSSatish Balay if (a->M != a->N) 634cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 635cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 636cee3aa6bSSatish Balay } 637cee3aa6bSSatish Balay 638cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 639cee3aa6bSSatish Balay { 640cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 641cee3aa6bSSatish Balay int ierr; 642cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 643cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 644cee3aa6bSSatish Balay return 0; 645cee3aa6bSSatish Balay } 64657b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 64757b952d6SSatish Balay { 64857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 64957b952d6SSatish Balay *m = mat->M; *n = mat->N; 65057b952d6SSatish Balay return 0; 65157b952d6SSatish Balay } 65257b952d6SSatish Balay 65357b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 65457b952d6SSatish Balay { 65557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 65657b952d6SSatish Balay *m = mat->m; *n = mat->N; 65757b952d6SSatish Balay return 0; 65857b952d6SSatish Balay } 65957b952d6SSatish Balay 66057b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 66157b952d6SSatish Balay { 66257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 66357b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 66457b952d6SSatish Balay return 0; 66557b952d6SSatish Balay } 66657b952d6SSatish Balay 667acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 668acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 669acdf5bf4SSatish Balay 670acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 671acdf5bf4SSatish Balay { 672acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 673acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 674acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 675d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 676d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 677acdf5bf4SSatish Balay 678acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 679acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 680acdf5bf4SSatish Balay 681acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 682acdf5bf4SSatish Balay /* 683acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 684acdf5bf4SSatish Balay */ 685acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 686bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 687bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 688acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 689acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 690acdf5bf4SSatish Balay } 691acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 692acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 693acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 694acdf5bf4SSatish Balay } 695acdf5bf4SSatish Balay 696acdf5bf4SSatish Balay 697d9d09a02SSatish Balay if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 698d9d09a02SSatish Balay lrow = row - brstart; 699acdf5bf4SSatish Balay 700acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 701acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 702acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 703acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 704acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 705acdf5bf4SSatish Balay nztot = nzA + nzB; 706acdf5bf4SSatish Balay 707acdf5bf4SSatish Balay cmap = mat->garray; 708acdf5bf4SSatish Balay if (v || idx) { 709acdf5bf4SSatish Balay if (nztot) { 710acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 711acdf5bf4SSatish Balay int imark = -1; 712acdf5bf4SSatish Balay if (v) { 713acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 714acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 715d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 716acdf5bf4SSatish Balay else break; 717acdf5bf4SSatish Balay } 718acdf5bf4SSatish Balay imark = i; 719acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 720acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 721acdf5bf4SSatish Balay } 722acdf5bf4SSatish Balay if (idx) { 723acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 724acdf5bf4SSatish Balay if (imark > -1) { 725acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 726bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 727acdf5bf4SSatish Balay } 728acdf5bf4SSatish Balay } else { 729acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 730d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 731d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 732acdf5bf4SSatish Balay else break; 733acdf5bf4SSatish Balay } 734acdf5bf4SSatish Balay imark = i; 735acdf5bf4SSatish Balay } 736d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 737d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 738acdf5bf4SSatish Balay } 739acdf5bf4SSatish Balay } 740d212a18eSSatish Balay else { 741d212a18eSSatish Balay if (idx) *idx = 0; 742d212a18eSSatish Balay if (v) *v = 0; 743d212a18eSSatish Balay } 744acdf5bf4SSatish Balay } 745acdf5bf4SSatish Balay *nz = nztot; 746acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 747acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 748acdf5bf4SSatish Balay return 0; 749acdf5bf4SSatish Balay } 750acdf5bf4SSatish Balay 751acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 752acdf5bf4SSatish Balay { 753acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 754acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 755acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 756acdf5bf4SSatish Balay } 757acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 758acdf5bf4SSatish Balay return 0; 759acdf5bf4SSatish Balay } 760acdf5bf4SSatish Balay 7615a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 7625a838052SSatish Balay { 7635a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 7645a838052SSatish Balay *bs = baij->bs; 7655a838052SSatish Balay return 0; 7665a838052SSatish Balay } 7675a838052SSatish Balay 76858667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 76958667388SSatish Balay { 77058667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 77158667388SSatish Balay int ierr; 77258667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 77358667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 77458667388SSatish Balay return 0; 77558667388SSatish Balay } 7760ac07820SSatish Balay 7774e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 7780ac07820SSatish Balay { 7794e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 7804e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 7817d57db60SLois Curfman McInnes int ierr; 7827d57db60SLois Curfman McInnes double isend[5], irecv[5]; 7830ac07820SSatish Balay 7844e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 7854e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 7864e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 7874e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 7884e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 7894e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 7904e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 7914e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 7924e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 7930ac07820SSatish Balay if (flag == MAT_LOCAL) { 7944e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7954e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7964e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7974e220ebcSLois Curfman McInnes info->memory = isend[3]; 7984e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7990ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 8000ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm); 8014e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8024e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8034e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8044e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8054e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8060ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 8070ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm); 8084e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8094e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8104e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8114e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8124e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8130ac07820SSatish Balay } 8144e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8154e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 8164e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 8170ac07820SSatish Balay return 0; 8180ac07820SSatish Balay } 8190ac07820SSatish Balay 82058667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 82158667388SSatish Balay { 82258667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 82358667388SSatish Balay 82458667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 82558667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 8266da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 827b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 828b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 829b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 830b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 831aeafbbfcSLois Curfman McInnes a->roworiented = 1; 83258667388SSatish Balay MatSetOption(a->A,op); 83358667388SSatish Balay MatSetOption(a->B,op); 834b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 8356da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 83658667388SSatish Balay op == MAT_SYMMETRIC || 83758667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 83858667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 83958667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 84058667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 84158667388SSatish Balay a->roworiented = 0; 84258667388SSatish Balay MatSetOption(a->A,op); 84358667388SSatish Balay MatSetOption(a->B,op); 84490f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 84590f02eecSBarry Smith a->donotstash = 1; 84690f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 8470ac07820SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:MAT_NO_NEW_DIAGONALS");} 84858667388SSatish Balay else 8490ac07820SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIBAIJ:unknown option");} 85058667388SSatish Balay return 0; 85158667388SSatish Balay } 85258667388SSatish Balay 8530ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 8540ac07820SSatish Balay { 8550ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 8560ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 8570ac07820SSatish Balay Mat B; 8580ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 8590ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 8600ac07820SSatish Balay Scalar *a; 8610ac07820SSatish Balay 8620ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 8630ac07820SSatish Balay SETERRQ(1,"MatTranspose_MPIBAIJ:Square matrix only for in-place"); 8640ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 8650ac07820SSatish Balay CHKERRQ(ierr); 8660ac07820SSatish Balay 8670ac07820SSatish Balay /* copy over the A part */ 8680ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 8690ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 8700ac07820SSatish Balay row = baij->rstart; 8710ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 8720ac07820SSatish Balay 8730ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 8740ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 8750ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 8760ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 8770ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 8780ac07820SSatish Balay for (k=0; k<bs; k++ ) { 8790ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 8800ac07820SSatish Balay col++; a += bs; 8810ac07820SSatish Balay } 8820ac07820SSatish Balay } 8830ac07820SSatish Balay } 8840ac07820SSatish Balay /* copy over the B part */ 8850ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 8860ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 8870ac07820SSatish Balay row = baij->rstart*bs; 8880ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 8890ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 8900ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 8910ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 8920ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 8930ac07820SSatish Balay for (k=0; k<bs; k++ ) { 8940ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 8950ac07820SSatish Balay col++; a += bs; 8960ac07820SSatish Balay } 8970ac07820SSatish Balay } 8980ac07820SSatish Balay } 8990ac07820SSatish Balay PetscFree(rvals); 9000ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 9010ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 9020ac07820SSatish Balay 9030ac07820SSatish Balay if (matout != PETSC_NULL) { 9040ac07820SSatish Balay *matout = B; 9050ac07820SSatish Balay } else { 9060ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 9070ac07820SSatish Balay PetscFree(baij->rowners); 9080ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 9090ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 9100ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 9110ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 9120ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 9130ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 9140ac07820SSatish Balay PetscFree(baij); 9150ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 9160ac07820SSatish Balay PetscHeaderDestroy(B); 9170ac07820SSatish Balay } 9180ac07820SSatish Balay return 0; 9190ac07820SSatish Balay } 9200e95ebc0SSatish Balay 9210e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 9220e95ebc0SSatish Balay { 9230e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 9240e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 9250e95ebc0SSatish Balay int ierr,s1,s2,s3; 9260e95ebc0SSatish Balay 9270e95ebc0SSatish Balay if (ll) { 9280e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 9290e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 9300e95ebc0SSatish Balay if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIBAIJ: non-conforming local sizes"); 9310e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 9320e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 9330e95ebc0SSatish Balay } 9340e95ebc0SSatish Balay if (rr) SETERRQ(1,"MatDiagonalScale_MPIBAIJ:not supported for right vector"); 9350e95ebc0SSatish Balay return 0; 9360e95ebc0SSatish Balay } 9370e95ebc0SSatish Balay 9380ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 9390ac07820SSatish Balay matrix is square and the column and row owerships are identical. 9400ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 9410ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 9420ac07820SSatish Balay routine. 9430ac07820SSatish Balay */ 9440ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 9450ac07820SSatish Balay { 9460ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 9470ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 9480ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 9490ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 9500ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 9510ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 9520ac07820SSatish Balay MPI_Comm comm = A->comm; 9530ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 9540ac07820SSatish Balay MPI_Status recv_status,*send_status; 9550ac07820SSatish Balay IS istmp; 9560ac07820SSatish Balay 9570ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 9580ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 9590ac07820SSatish Balay 9600ac07820SSatish Balay /* first count number of contributors to each processor */ 9610ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 9620ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 9630ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 9640ac07820SSatish Balay for ( i=0; i<N; i++ ) { 9650ac07820SSatish Balay idx = rows[i]; 9660ac07820SSatish Balay found = 0; 9670ac07820SSatish Balay for ( j=0; j<size; j++ ) { 9680ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 9690ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 9700ac07820SSatish Balay } 9710ac07820SSatish Balay } 9720ac07820SSatish Balay if (!found) SETERRQ(1,"MatZeroRows_MPIBAIJ:Index out of range"); 9730ac07820SSatish Balay } 9740ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 9750ac07820SSatish Balay 9760ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 9770ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 9780ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 9790ac07820SSatish Balay nrecvs = work[rank]; 9800ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 9810ac07820SSatish Balay nmax = work[rank]; 9820ac07820SSatish Balay PetscFree(work); 9830ac07820SSatish Balay 9840ac07820SSatish Balay /* post receives: */ 9850ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 9860ac07820SSatish Balay CHKPTRQ(rvalues); 9870ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 9880ac07820SSatish Balay CHKPTRQ(recv_waits); 9890ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 9900ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 9910ac07820SSatish Balay } 9920ac07820SSatish Balay 9930ac07820SSatish Balay /* do sends: 9940ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 9950ac07820SSatish Balay the ith processor 9960ac07820SSatish Balay */ 9970ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 9980ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 9990ac07820SSatish Balay CHKPTRQ(send_waits); 10000ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 10010ac07820SSatish Balay starts[0] = 0; 10020ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 10030ac07820SSatish Balay for ( i=0; i<N; i++ ) { 10040ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 10050ac07820SSatish Balay } 10060ac07820SSatish Balay ISRestoreIndices(is,&rows); 10070ac07820SSatish Balay 10080ac07820SSatish Balay starts[0] = 0; 10090ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 10100ac07820SSatish Balay count = 0; 10110ac07820SSatish Balay for ( i=0; i<size; i++ ) { 10120ac07820SSatish Balay if (procs[i]) { 10130ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 10140ac07820SSatish Balay } 10150ac07820SSatish Balay } 10160ac07820SSatish Balay PetscFree(starts); 10170ac07820SSatish Balay 10180ac07820SSatish Balay base = owners[rank]*bs; 10190ac07820SSatish Balay 10200ac07820SSatish Balay /* wait on receives */ 10210ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 10220ac07820SSatish Balay source = lens + nrecvs; 10230ac07820SSatish Balay count = nrecvs; slen = 0; 10240ac07820SSatish Balay while (count) { 10250ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 10260ac07820SSatish Balay /* unpack receives into our local space */ 10270ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 10280ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 10290ac07820SSatish Balay lens[imdex] = n; 10300ac07820SSatish Balay slen += n; 10310ac07820SSatish Balay count--; 10320ac07820SSatish Balay } 10330ac07820SSatish Balay PetscFree(recv_waits); 10340ac07820SSatish Balay 10350ac07820SSatish Balay /* move the data into the send scatter */ 10360ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 10370ac07820SSatish Balay count = 0; 10380ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 10390ac07820SSatish Balay values = rvalues + i*nmax; 10400ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 10410ac07820SSatish Balay lrows[count++] = values[j] - base; 10420ac07820SSatish Balay } 10430ac07820SSatish Balay } 10440ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 10450ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 10460ac07820SSatish Balay 10470ac07820SSatish Balay /* actually zap the local rows */ 1048537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 10490ac07820SSatish Balay PLogObjectParent(A,istmp); 10500ac07820SSatish Balay PetscFree(lrows); 10510ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 10520ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 10530ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 10540ac07820SSatish Balay 10550ac07820SSatish Balay /* wait on sends */ 10560ac07820SSatish Balay if (nsends) { 10570ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 10580ac07820SSatish Balay CHKPTRQ(send_status); 10590ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 10600ac07820SSatish Balay PetscFree(send_status); 10610ac07820SSatish Balay } 10620ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 10630ac07820SSatish Balay 10640ac07820SSatish Balay return 0; 10650ac07820SSatish Balay } 1066ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 1067ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A) 1068ba4ca20aSSatish Balay { 1069ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1070ba4ca20aSSatish Balay 1071ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1072ba4ca20aSSatish Balay else return 0; 1073ba4ca20aSSatish Balay } 10740ac07820SSatish Balay 10750ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 10760ac07820SSatish Balay 107779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 107879bdfe76SSatish Balay static struct _MatOps MatOps = { 1079bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 10804c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 10814c50302cSBarry Smith 0,0,0,0, 10820ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 10830e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 108458667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 10854c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 10864c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 10874c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1088905e6a2fSBarry Smith 0,0,0,MatConvertSameType_MPIBAIJ,0,0, 1089d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1090ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 10914c50302cSBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ}; 109279bdfe76SSatish Balay 109379bdfe76SSatish Balay 109479bdfe76SSatish Balay /*@C 109579bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 109679bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 109779bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 109879bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 109979bdfe76SSatish Balay performance can be increased by more than a factor of 50. 110079bdfe76SSatish Balay 110179bdfe76SSatish Balay Input Parameters: 110279bdfe76SSatish Balay . comm - MPI communicator 110379bdfe76SSatish Balay . bs - size of blockk 110479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 110592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 110692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 110792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 110892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 110992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 111079bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 111192e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 111279bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 111379bdfe76SSatish Balay submatrix (same for all local rows) 111492e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 111592e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 111692e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 111792e8d321SLois Curfman McInnes it is zero. 111892e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 111979bdfe76SSatish Balay submatrix (same for all local rows). 112092e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 112192e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 112292e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 112379bdfe76SSatish Balay 112479bdfe76SSatish Balay Output Parameter: 112579bdfe76SSatish Balay . A - the matrix 112679bdfe76SSatish Balay 112779bdfe76SSatish Balay Notes: 112879bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 112979bdfe76SSatish Balay (possibly both). 113079bdfe76SSatish Balay 113179bdfe76SSatish Balay Storage Information: 113279bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 113379bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 113479bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 113579bdfe76SSatish Balay local matrix (a rectangular submatrix). 113679bdfe76SSatish Balay 113779bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 113879bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 113979bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 114079bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 114179bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 114279bdfe76SSatish Balay 114379bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 114479bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 114579bdfe76SSatish Balay 114679bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 114779bdfe76SSatish Balay $ ------------------- 114879bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 114979bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 115079bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 115179bdfe76SSatish Balay $ ------------------- 115279bdfe76SSatish Balay $ 115379bdfe76SSatish Balay 115479bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 115579bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 115679bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 115757b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 115879bdfe76SSatish Balay 115979bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 116079bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 116179bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 116292e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 116392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 11646da5968aSLois Curfman McInnes matrices. 116579bdfe76SSatish Balay 116692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 116779bdfe76SSatish Balay 116879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 116979bdfe76SSatish Balay @*/ 117079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 117179bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 117279bdfe76SSatish Balay { 117379bdfe76SSatish Balay Mat B; 117479bdfe76SSatish Balay Mat_MPIBAIJ *b; 11754c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 117679bdfe76SSatish Balay 117779bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 117879bdfe76SSatish Balay *A = 0; 117979bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 118079bdfe76SSatish Balay PLogObjectCreate(B); 118179bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 118279bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 118379bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 11844c50302cSBarry Smith MPI_Comm_size(comm,&size); 11854c50302cSBarry Smith if (size == 1) { 11864c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 11874c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 11884c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 11894c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 11904c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 11914c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 11924c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 11934c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 11944c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 11954c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 11964c50302cSBarry Smith } 11974c50302cSBarry Smith 119879bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 119979bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 120090f02eecSBarry Smith B->mapping = 0; 120179bdfe76SSatish Balay B->factor = 0; 120279bdfe76SSatish Balay B->assembled = PETSC_FALSE; 120379bdfe76SSatish Balay 120479bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 120579bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 120679bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 120779bdfe76SSatish Balay 120879bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 120957b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1210cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 1211cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 1212cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1213cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 121479bdfe76SSatish Balay 121579bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 121679bdfe76SSatish Balay work[0] = m; work[1] = n; 121779bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 121879bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 121979bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 122079bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 122179bdfe76SSatish Balay } 122279bdfe76SSatish Balay if (m == PETSC_DECIDE) { 122379bdfe76SSatish Balay Mbs = M/bs; 122479bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 122579bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 122679bdfe76SSatish Balay m = mbs*bs; 122779bdfe76SSatish Balay } 122879bdfe76SSatish Balay if (n == PETSC_DECIDE) { 122979bdfe76SSatish Balay Nbs = N/bs; 123079bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 123179bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 123279bdfe76SSatish Balay n = nbs*bs; 123379bdfe76SSatish Balay } 1234cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 123579bdfe76SSatish Balay 123679bdfe76SSatish Balay b->m = m; B->m = m; 123779bdfe76SSatish Balay b->n = n; B->n = n; 123879bdfe76SSatish Balay b->N = N; B->N = N; 123979bdfe76SSatish Balay b->M = M; B->M = M; 124079bdfe76SSatish Balay b->bs = bs; 124179bdfe76SSatish Balay b->bs2 = bs*bs; 124279bdfe76SSatish Balay b->mbs = mbs; 124379bdfe76SSatish Balay b->nbs = nbs; 124479bdfe76SSatish Balay b->Mbs = Mbs; 124579bdfe76SSatish Balay b->Nbs = Nbs; 124679bdfe76SSatish Balay 124779bdfe76SSatish Balay /* build local table of row and column ownerships */ 124879bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 124979bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 12500ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 125179bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 125279bdfe76SSatish Balay b->rowners[0] = 0; 125379bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 125479bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 125579bdfe76SSatish Balay } 125679bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 125779bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 12584fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 12594fa0d573SSatish Balay b->rend_bs = b->rend * bs; 12604fa0d573SSatish Balay 126179bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 126279bdfe76SSatish Balay b->cowners[0] = 0; 126379bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 126479bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 126579bdfe76SSatish Balay } 126679bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 126779bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 12684fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 12694fa0d573SSatish Balay b->cend_bs = b->cend * bs; 127079bdfe76SSatish Balay 127179bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 127279bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 127379bdfe76SSatish Balay PLogObjectParent(B,b->A); 127479bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 127579bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 127679bdfe76SSatish Balay PLogObjectParent(B,b->B); 127779bdfe76SSatish Balay 127879bdfe76SSatish Balay /* build cache for off array entries formed */ 127979bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 128090f02eecSBarry Smith b->donotstash = 0; 128179bdfe76SSatish Balay b->colmap = 0; 128279bdfe76SSatish Balay b->garray = 0; 128379bdfe76SSatish Balay b->roworiented = 1; 128479bdfe76SSatish Balay 128579bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 128679bdfe76SSatish Balay b->lvec = 0; 128779bdfe76SSatish Balay b->Mvctx = 0; 128879bdfe76SSatish Balay 128979bdfe76SSatish Balay /* stuff for MatGetRow() */ 129079bdfe76SSatish Balay b->rowindices = 0; 129179bdfe76SSatish Balay b->rowvalues = 0; 129279bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 129379bdfe76SSatish Balay 129479bdfe76SSatish Balay *A = B; 129579bdfe76SSatish Balay return 0; 129679bdfe76SSatish Balay } 12970ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 12980ac07820SSatish Balay { 12990ac07820SSatish Balay Mat mat; 13000ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 13010ac07820SSatish Balay int ierr, len=0, flg; 13020ac07820SSatish Balay 13030ac07820SSatish Balay *newmat = 0; 13040ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 13050ac07820SSatish Balay PLogObjectCreate(mat); 13060ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 13070ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 13080ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 13090ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 13100ac07820SSatish Balay mat->factor = matin->factor; 13110ac07820SSatish Balay mat->assembled = PETSC_TRUE; 13120ac07820SSatish Balay 13130ac07820SSatish Balay a->m = mat->m = oldmat->m; 13140ac07820SSatish Balay a->n = mat->n = oldmat->n; 13150ac07820SSatish Balay a->M = mat->M = oldmat->M; 13160ac07820SSatish Balay a->N = mat->N = oldmat->N; 13170ac07820SSatish Balay 13180ac07820SSatish Balay a->bs = oldmat->bs; 13190ac07820SSatish Balay a->bs2 = oldmat->bs2; 13200ac07820SSatish Balay a->mbs = oldmat->mbs; 13210ac07820SSatish Balay a->nbs = oldmat->nbs; 13220ac07820SSatish Balay a->Mbs = oldmat->Mbs; 13230ac07820SSatish Balay a->Nbs = oldmat->Nbs; 13240ac07820SSatish Balay 13250ac07820SSatish Balay a->rstart = oldmat->rstart; 13260ac07820SSatish Balay a->rend = oldmat->rend; 13270ac07820SSatish Balay a->cstart = oldmat->cstart; 13280ac07820SSatish Balay a->cend = oldmat->cend; 13290ac07820SSatish Balay a->size = oldmat->size; 13300ac07820SSatish Balay a->rank = oldmat->rank; 13310ac07820SSatish Balay a->insertmode = NOT_SET_VALUES; 13320ac07820SSatish Balay a->rowvalues = 0; 13330ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 13340ac07820SSatish Balay 13350ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 13360ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 13370ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 13380ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 13390ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 13400ac07820SSatish Balay if (oldmat->colmap) { 13410ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 13420ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 13430ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 13440ac07820SSatish Balay } else a->colmap = 0; 13450ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 13460ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 13470ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 13480ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 13490ac07820SSatish Balay } else a->garray = 0; 13500ac07820SSatish Balay 13510ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 13520ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 13530ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 13540ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 13550ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 13560ac07820SSatish Balay PLogObjectParent(mat,a->A); 13570ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 13580ac07820SSatish Balay PLogObjectParent(mat,a->B); 13590ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 13600ac07820SSatish Balay if (flg) { 13610ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 13620ac07820SSatish Balay } 13630ac07820SSatish Balay *newmat = mat; 13640ac07820SSatish Balay return 0; 13650ac07820SSatish Balay } 136657b952d6SSatish Balay 136757b952d6SSatish Balay #include "sys.h" 136857b952d6SSatish Balay 136957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 137057b952d6SSatish Balay { 137157b952d6SSatish Balay Mat A; 137257b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 137357b952d6SSatish Balay Scalar *vals,*buf; 137457b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 137557b952d6SSatish Balay MPI_Status status; 1376cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 137757b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 137857b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 137957b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 138057b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 138157b952d6SSatish Balay 138257b952d6SSatish Balay 138357b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 138457b952d6SSatish Balay bs2 = bs*bs; 138557b952d6SSatish Balay 138657b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 138757b952d6SSatish Balay if (!rank) { 138857b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 138957b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1390cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 139157b952d6SSatish Balay } 139257b952d6SSatish Balay 139357b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 139457b952d6SSatish Balay M = header[1]; N = header[2]; 139557b952d6SSatish Balay 1396b0267e0aSLois Curfman McInnes if (M != N) SETERRQ(1,"MatLoad_MPIBAIJ:Can only do square matrices"); 139757b952d6SSatish Balay 139857b952d6SSatish Balay /* 139957b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 140057b952d6SSatish Balay divisible by the blocksize 140157b952d6SSatish Balay */ 140257b952d6SSatish Balay Mbs = M/bs; 140357b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 140457b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 140557b952d6SSatish Balay else Mbs++; 140657b952d6SSatish Balay if (extra_rows &&!rank) { 1407b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 140857b952d6SSatish Balay } 1409537820f0SBarry Smith 141057b952d6SSatish Balay /* determine ownership of all rows */ 141157b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 141257b952d6SSatish Balay m = mbs * bs; 1413cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1414cee3aa6bSSatish Balay browners = rowners + size + 1; 141557b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 141657b952d6SSatish Balay rowners[0] = 0; 1417cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1418cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 141957b952d6SSatish Balay rstart = rowners[rank]; 142057b952d6SSatish Balay rend = rowners[rank+1]; 142157b952d6SSatish Balay 142257b952d6SSatish Balay /* distribute row lengths to all processors */ 142357b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 142457b952d6SSatish Balay if (!rank) { 142557b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 142657b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 142757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 142857b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1429cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1430cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 143157b952d6SSatish Balay PetscFree(sndcounts); 143257b952d6SSatish Balay } 143357b952d6SSatish Balay else { 143457b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 143557b952d6SSatish Balay } 143657b952d6SSatish Balay 143757b952d6SSatish Balay if (!rank) { 143857b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 143957b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 144057b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 144157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 144257b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 144357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 144457b952d6SSatish Balay } 144557b952d6SSatish Balay } 144657b952d6SSatish Balay PetscFree(rowlengths); 144757b952d6SSatish Balay 144857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 144957b952d6SSatish Balay maxnz = 0; 145057b952d6SSatish Balay for ( i=0; i<size; i++ ) { 145157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 145257b952d6SSatish Balay } 145357b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 145457b952d6SSatish Balay 145557b952d6SSatish Balay /* read in my part of the matrix column indices */ 145657b952d6SSatish Balay nz = procsnz[0]; 145757b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 145857b952d6SSatish Balay mycols = ibuf; 1459cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 146057b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1461cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1462cee3aa6bSSatish Balay 146357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 146457b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 146557b952d6SSatish Balay nz = procsnz[i]; 146657b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 146757b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 146857b952d6SSatish Balay } 146957b952d6SSatish Balay /* read in the stuff for the last proc */ 147057b952d6SSatish Balay if ( size != 1 ) { 147157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 147257b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 147357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1474cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 147557b952d6SSatish Balay } 147657b952d6SSatish Balay PetscFree(cols); 147757b952d6SSatish Balay } 147857b952d6SSatish Balay else { 147957b952d6SSatish Balay /* determine buffer space needed for message */ 148057b952d6SSatish Balay nz = 0; 148157b952d6SSatish Balay for ( i=0; i<m; i++ ) { 148257b952d6SSatish Balay nz += locrowlens[i]; 148357b952d6SSatish Balay } 148457b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 148557b952d6SSatish Balay mycols = ibuf; 148657b952d6SSatish Balay /* receive message of column indices*/ 148757b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 148857b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1489cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 149057b952d6SSatish Balay } 149157b952d6SSatish Balay 149257b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1493cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1494cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 149557b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1496cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 149757b952d6SSatish Balay masked1 = mask + Mbs; 149857b952d6SSatish Balay masked2 = masked1 + Mbs; 149957b952d6SSatish Balay rowcount = 0; nzcount = 0; 150057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 150157b952d6SSatish Balay dcount = 0; 150257b952d6SSatish Balay odcount = 0; 150357b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 150457b952d6SSatish Balay kmax = locrowlens[rowcount]; 150557b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 150657b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 150757b952d6SSatish Balay if (!mask[tmp]) { 150857b952d6SSatish Balay mask[tmp] = 1; 150957b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 151057b952d6SSatish Balay else masked1[dcount++] = tmp; 151157b952d6SSatish Balay } 151257b952d6SSatish Balay } 151357b952d6SSatish Balay rowcount++; 151457b952d6SSatish Balay } 1515cee3aa6bSSatish Balay 151657b952d6SSatish Balay dlens[i] = dcount; 151757b952d6SSatish Balay odlens[i] = odcount; 1518cee3aa6bSSatish Balay 151957b952d6SSatish Balay /* zero out the mask elements we set */ 152057b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 152157b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 152257b952d6SSatish Balay } 1523cee3aa6bSSatish Balay 152457b952d6SSatish Balay /* create our matrix */ 1525537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1526537820f0SBarry Smith CHKERRQ(ierr); 152757b952d6SSatish Balay A = *newmat; 15286d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 152957b952d6SSatish Balay 153057b952d6SSatish Balay if (!rank) { 153157b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 153257b952d6SSatish Balay /* read in my part of the matrix numerical values */ 153357b952d6SSatish Balay nz = procsnz[0]; 153457b952d6SSatish Balay vals = buf; 1535cee3aa6bSSatish Balay mycols = ibuf; 1536cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 153757b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1538cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1539537820f0SBarry Smith 154057b952d6SSatish Balay /* insert into matrix */ 154157b952d6SSatish Balay jj = rstart*bs; 154257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 154357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 154457b952d6SSatish Balay mycols += locrowlens[i]; 154557b952d6SSatish Balay vals += locrowlens[i]; 154657b952d6SSatish Balay jj++; 154757b952d6SSatish Balay } 154857b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 154957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 155057b952d6SSatish Balay nz = procsnz[i]; 155157b952d6SSatish Balay vals = buf; 155257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 155357b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 155457b952d6SSatish Balay } 155557b952d6SSatish Balay /* the last proc */ 155657b952d6SSatish Balay if ( size != 1 ){ 155757b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1558cee3aa6bSSatish Balay vals = buf; 155957b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 156057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1561cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 156257b952d6SSatish Balay } 156357b952d6SSatish Balay PetscFree(procsnz); 156457b952d6SSatish Balay } 156557b952d6SSatish Balay else { 156657b952d6SSatish Balay /* receive numeric values */ 156757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 156857b952d6SSatish Balay 156957b952d6SSatish Balay /* receive message of values*/ 157057b952d6SSatish Balay vals = buf; 1571cee3aa6bSSatish Balay mycols = ibuf; 157257b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 157357b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1574cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 157557b952d6SSatish Balay 157657b952d6SSatish Balay /* insert into matrix */ 157757b952d6SSatish Balay jj = rstart*bs; 1578cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 157957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 158057b952d6SSatish Balay mycols += locrowlens[i]; 158157b952d6SSatish Balay vals += locrowlens[i]; 158257b952d6SSatish Balay jj++; 158357b952d6SSatish Balay } 158457b952d6SSatish Balay } 158557b952d6SSatish Balay PetscFree(locrowlens); 158657b952d6SSatish Balay PetscFree(buf); 158757b952d6SSatish Balay PetscFree(ibuf); 158857b952d6SSatish Balay PetscFree(rowners); 158957b952d6SSatish Balay PetscFree(dlens); 1590cee3aa6bSSatish Balay PetscFree(mask); 15916d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 15926d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 159357b952d6SSatish Balay return 0; 159457b952d6SSatish Balay } 159557b952d6SSatish Balay 159657b952d6SSatish Balay 1597