179bdfe76SSatish Balay #ifndef lint 2*bb5a7306SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.44 1997/01/01 03:38:25 bsmith Exp bsmith $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 243b2fbd54SBarry Smith 25537820f0SBarry Smith /* 26537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2957b952d6SSatish Balay length of colmap equals the global matrix length. 3057b952d6SSatish Balay */ 31026e39d0SSatish Balay #undef __FUNCTION__ 32026e39d0SSatish Balay #define __FUNCTION__ "CreateColmap_MPIBAIJ_Private" 3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 37928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3857b952d6SSatish Balay 3957b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 4057b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4157b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 42928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4357b952d6SSatish Balay return 0; 4457b952d6SSatish Balay } 4557b952d6SSatish Balay 46026e39d0SSatish Balay #undef __FUNCTION__ 47026e39d0SSatish Balay #define __FUNCTION__ "MatGetRowIJ_MPIBAIJ(" 483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 5057b952d6SSatish Balay { 513b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5257b952d6SSatish Balay int ierr; 533b2fbd54SBarry Smith if (aij->size == 1) { 543b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 55e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 563b2fbd54SBarry Smith return 0; 573b2fbd54SBarry Smith } 583b2fbd54SBarry Smith 59026e39d0SSatish Balay #undef __FUNCTION__ 60026e39d0SSatish Balay #define __FUNCTION__ "MatRestoreRowIJ_MPIBAIJ" 613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 623b2fbd54SBarry Smith PetscTruth *done) 633b2fbd54SBarry Smith { 643b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 653b2fbd54SBarry Smith int ierr; 663b2fbd54SBarry Smith if (aij->size == 1) { 673b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 68e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6957b952d6SSatish Balay return 0; 7057b952d6SSatish Balay } 7157b952d6SSatish Balay 72639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 73026e39d0SSatish Balay #undef __FUNCTION__ 74026e39d0SSatish Balay #define __FUNCTION__ "MatSetValues_MPIBAIJ" 7557b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 7657b952d6SSatish Balay { 7757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 7857b952d6SSatish Balay Scalar value; 794fa0d573SSatish Balay int ierr,i,j,row,col; 804fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 814fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 824fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 8357b952d6SSatish Balay 842c3acbe9SBarry Smith #if defined(PETSC_BOPT_g) 8557b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 86e3372554SBarry Smith SETERRQ(1,0,"Cannot mix inserts and adds"); 8757b952d6SSatish Balay } 882c3acbe9SBarry Smith #endif 8957b952d6SSatish Balay baij->insertmode = addv; 9057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 91639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 92e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 93e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 94639f9d9dSBarry Smith #endif 9557b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 9657b952d6SSatish Balay row = im[i] - rstart_orig; 9757b952d6SSatish Balay for ( j=0; j<n; j++ ) { 9857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 9957b952d6SSatish Balay col = in[j] - cstart_orig; 10057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 101639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 10257b952d6SSatish Balay } 103639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 104e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 105e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 106639f9d9dSBarry Smith #endif 10757b952d6SSatish Balay else { 10857b952d6SSatish Balay if (mat->was_assembled) { 109905e6a2fSBarry Smith if (!baij->colmap) { 110905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 111905e6a2fSBarry Smith } 112905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 11357b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 11457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 11557b952d6SSatish Balay col = in[j]; 11657b952d6SSatish Balay } 11757b952d6SSatish Balay } 11857b952d6SSatish Balay else col = in[j]; 11957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 120639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 12157b952d6SSatish Balay } 12257b952d6SSatish Balay } 12357b952d6SSatish Balay } 12457b952d6SSatish Balay else { 12590f02eecSBarry Smith if (roworiented && !baij->donotstash) { 12657b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 12757b952d6SSatish Balay } 12857b952d6SSatish Balay else { 12990f02eecSBarry Smith if (!baij->donotstash) { 13057b952d6SSatish Balay row = im[i]; 13157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 13257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 13357b952d6SSatish Balay } 13457b952d6SSatish Balay } 13557b952d6SSatish Balay } 13657b952d6SSatish Balay } 13790f02eecSBarry Smith } 13857b952d6SSatish Balay return 0; 13957b952d6SSatish Balay } 14057b952d6SSatish Balay 141026e39d0SSatish Balay #undef __FUNCTION__ 142026e39d0SSatish Balay #define __FUNCTION__ "MatGetValues_MPIBAIJ" 143d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 144d6de1c52SSatish Balay { 145d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 146d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 147d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 148d6de1c52SSatish Balay 149d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 150e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 151e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 152d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 153d6de1c52SSatish Balay row = idxm[i] - bsrstart; 154d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 155e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 156e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 157d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 158d6de1c52SSatish Balay col = idxn[j] - bscstart; 159d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 160d6de1c52SSatish Balay } 161d6de1c52SSatish Balay else { 162905e6a2fSBarry Smith if (!baij->colmap) { 163905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 164905e6a2fSBarry Smith } 165e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 166dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 167d9d09a02SSatish Balay else { 168dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 169d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 170d6de1c52SSatish Balay } 171d6de1c52SSatish Balay } 172d6de1c52SSatish Balay } 173d9d09a02SSatish Balay } 174d6de1c52SSatish Balay else { 175e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 176d6de1c52SSatish Balay } 177d6de1c52SSatish Balay } 178d6de1c52SSatish Balay return 0; 179d6de1c52SSatish Balay } 180d6de1c52SSatish Balay 181026e39d0SSatish Balay #undef __FUNCTION__ 182026e39d0SSatish Balay #define __FUNCTION__ "MatNorm_MPIBAIJ" 183d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 184d6de1c52SSatish Balay { 185d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 186d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 187acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 188d6de1c52SSatish Balay double sum = 0.0; 189d6de1c52SSatish Balay Scalar *v; 190d6de1c52SSatish Balay 191d6de1c52SSatish Balay if (baij->size == 1) { 192d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 193d6de1c52SSatish Balay } else { 194d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 195d6de1c52SSatish Balay v = amat->a; 196d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 197d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 198d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 199d6de1c52SSatish Balay #else 200d6de1c52SSatish Balay sum += (*v)*(*v); v++; 201d6de1c52SSatish Balay #endif 202d6de1c52SSatish Balay } 203d6de1c52SSatish Balay v = bmat->a; 204d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 205d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 206d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 207d6de1c52SSatish Balay #else 208d6de1c52SSatish Balay sum += (*v)*(*v); v++; 209d6de1c52SSatish Balay #endif 210d6de1c52SSatish Balay } 211d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 212d6de1c52SSatish Balay *norm = sqrt(*norm); 213d6de1c52SSatish Balay } 214acdf5bf4SSatish Balay else 215e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 216d6de1c52SSatish Balay } 217d6de1c52SSatish Balay return 0; 218d6de1c52SSatish Balay } 21957b952d6SSatish Balay 220026e39d0SSatish Balay #undef __FUNCTION__ 221026e39d0SSatish Balay #define __FUNCTION__ "MatAssemblyBegin_MPIBAIJ" 22257b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 22357b952d6SSatish Balay { 22457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22557b952d6SSatish Balay MPI_Comm comm = mat->comm; 22657b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 22757b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 22857b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 22957b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 23057b952d6SSatish Balay InsertMode addv; 23157b952d6SSatish Balay Scalar *rvalues,*svalues; 23257b952d6SSatish Balay 23357b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 23457b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 23557b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 236e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 23757b952d6SSatish Balay } 23857b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 23957b952d6SSatish Balay 24057b952d6SSatish Balay /* first count number of contributors to each processor */ 24157b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 24257b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 24357b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 24457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 24557b952d6SSatish Balay idx = baij->stash.idx[i]; 24657b952d6SSatish Balay for ( j=0; j<size; j++ ) { 24757b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 24857b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 24957b952d6SSatish Balay } 25057b952d6SSatish Balay } 25157b952d6SSatish Balay } 25257b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 25357b952d6SSatish Balay 25457b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 25557b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 25657b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 25757b952d6SSatish Balay nreceives = work[rank]; 25857b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 25957b952d6SSatish Balay nmax = work[rank]; 26057b952d6SSatish Balay PetscFree(work); 26157b952d6SSatish Balay 26257b952d6SSatish Balay /* post receives: 26357b952d6SSatish Balay 1) each message will consist of ordered pairs 26457b952d6SSatish Balay (global index,value) we store the global index as a double 26557b952d6SSatish Balay to simplify the message passing. 26657b952d6SSatish Balay 2) since we don't know how long each individual message is we 26757b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 26857b952d6SSatish Balay this is a lot of wasted space. 26957b952d6SSatish Balay 27057b952d6SSatish Balay 27157b952d6SSatish Balay This could be done better. 27257b952d6SSatish Balay */ 27357b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 27457b952d6SSatish Balay CHKPTRQ(rvalues); 27557b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 27657b952d6SSatish Balay CHKPTRQ(recv_waits); 27757b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 27857b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 27957b952d6SSatish Balay comm,recv_waits+i); 28057b952d6SSatish Balay } 28157b952d6SSatish Balay 28257b952d6SSatish Balay /* do sends: 28357b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 28457b952d6SSatish Balay the ith processor 28557b952d6SSatish Balay */ 28657b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 28757b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 28857b952d6SSatish Balay CHKPTRQ(send_waits); 28957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 29057b952d6SSatish Balay starts[0] = 0; 29157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 29257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 29357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 29457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 29557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 29657b952d6SSatish Balay } 29757b952d6SSatish Balay PetscFree(owner); 29857b952d6SSatish Balay starts[0] = 0; 29957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 30057b952d6SSatish Balay count = 0; 30157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 30257b952d6SSatish Balay if (procs[i]) { 30357b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 30457b952d6SSatish Balay comm,send_waits+count++); 30557b952d6SSatish Balay } 30657b952d6SSatish Balay } 30757b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 30857b952d6SSatish Balay 30957b952d6SSatish Balay /* Free cache space */ 310d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 31157b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 31257b952d6SSatish Balay 31357b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 31457b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 31557b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 31657b952d6SSatish Balay baij->rmax = nmax; 31757b952d6SSatish Balay 31857b952d6SSatish Balay return 0; 31957b952d6SSatish Balay } 32057b952d6SSatish Balay 32157b952d6SSatish Balay 322026e39d0SSatish Balay #undef __FUNCTION__ 323026e39d0SSatish Balay #define __FUNCTION__ "MatAssemblyEnd_MPIBAIJ" 32457b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 32557b952d6SSatish Balay { 32657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 32757b952d6SSatish Balay MPI_Status *send_status,recv_status; 32857b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 32957b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 33057b952d6SSatish Balay Scalar *values,val; 33157b952d6SSatish Balay InsertMode addv = baij->insertmode; 33257b952d6SSatish Balay 33357b952d6SSatish Balay /* wait on receives */ 33457b952d6SSatish Balay while (count) { 33557b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 33657b952d6SSatish Balay /* unpack receives into our local space */ 33757b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 33857b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 33957b952d6SSatish Balay n = n/3; 34057b952d6SSatish Balay for ( i=0; i<n; i++ ) { 34157b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 34257b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 34357b952d6SSatish Balay val = values[3*i+2]; 34457b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 34557b952d6SSatish Balay col -= baij->cstart*bs; 34657b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 34757b952d6SSatish Balay } 34857b952d6SSatish Balay else { 34957b952d6SSatish Balay if (mat->was_assembled) { 350905e6a2fSBarry Smith if (!baij->colmap) { 351905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 352905e6a2fSBarry Smith } 353905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 35457b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 35557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 35657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 35757b952d6SSatish Balay } 35857b952d6SSatish Balay } 35957b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 36057b952d6SSatish Balay } 36157b952d6SSatish Balay } 36257b952d6SSatish Balay count--; 36357b952d6SSatish Balay } 36457b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 36557b952d6SSatish Balay 36657b952d6SSatish Balay /* wait on sends */ 36757b952d6SSatish Balay if (baij->nsends) { 36857b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 36957b952d6SSatish Balay CHKPTRQ(send_status); 37057b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 37157b952d6SSatish Balay PetscFree(send_status); 37257b952d6SSatish Balay } 37357b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 37457b952d6SSatish Balay 37557b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 37657b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 37757b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 37857b952d6SSatish Balay 37957b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 38057b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 38157b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 38257b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 38357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 38457b952d6SSatish Balay } 38557b952d6SSatish Balay 3866d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 38757b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 38857b952d6SSatish Balay } 38957b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 39057b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 39157b952d6SSatish Balay 39257b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 39357b952d6SSatish Balay return 0; 39457b952d6SSatish Balay } 39557b952d6SSatish Balay 396026e39d0SSatish Balay #undef __FUNCTION__ 397026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ_Binary" 39857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 39957b952d6SSatish Balay { 40057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 40157b952d6SSatish Balay int ierr; 40257b952d6SSatish Balay 40357b952d6SSatish Balay if (baij->size == 1) { 40457b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 40557b952d6SSatish Balay } 406e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 40757b952d6SSatish Balay return 0; 40857b952d6SSatish Balay } 40957b952d6SSatish Balay 410026e39d0SSatish Balay #undef __FUNCTION__ 411026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 41257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 41357b952d6SSatish Balay { 41457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 415cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 41657b952d6SSatish Balay FILE *fd; 41757b952d6SSatish Balay ViewerType vtype; 41857b952d6SSatish Balay 41957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 42057b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 42157b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 422639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 4234e220ebcSLois Curfman McInnes MatInfo info; 42457b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 42557b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 4264e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 42757b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 42857b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 4294e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 4304e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 4314e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 4324e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 4334e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 4344e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 43557b952d6SSatish Balay fflush(fd); 43657b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 43757b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 43857b952d6SSatish Balay return 0; 43957b952d6SSatish Balay } 440639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 441bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 44257b952d6SSatish Balay return 0; 44357b952d6SSatish Balay } 44457b952d6SSatish Balay } 44557b952d6SSatish Balay 44657b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 44757b952d6SSatish Balay Draw draw; 44857b952d6SSatish Balay PetscTruth isnull; 44957b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 45057b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 45157b952d6SSatish Balay } 45257b952d6SSatish Balay 45357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 45457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 45557b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 45657b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 45757b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 45857b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 45957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 46057b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 46157b952d6SSatish Balay fflush(fd); 46257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 46357b952d6SSatish Balay } 46457b952d6SSatish Balay else { 46557b952d6SSatish Balay int size = baij->size; 46657b952d6SSatish Balay rank = baij->rank; 46757b952d6SSatish Balay if (size == 1) { 46857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 46957b952d6SSatish Balay } 47057b952d6SSatish Balay else { 47157b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 47257b952d6SSatish Balay Mat A; 47357b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 47457b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 47557b952d6SSatish Balay int mbs=baij->mbs; 47657b952d6SSatish Balay Scalar *a; 47757b952d6SSatish Balay 47857b952d6SSatish Balay if (!rank) { 479cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 48057b952d6SSatish Balay CHKERRQ(ierr); 48157b952d6SSatish Balay } 48257b952d6SSatish Balay else { 483cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 48457b952d6SSatish Balay CHKERRQ(ierr); 48557b952d6SSatish Balay } 48657b952d6SSatish Balay PLogObjectParent(mat,A); 48757b952d6SSatish Balay 48857b952d6SSatish Balay /* copy over the A part */ 48957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 49057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 49157b952d6SSatish Balay row = baij->rstart; 49257b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 49357b952d6SSatish Balay 49457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 49557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 49657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 49757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 49857b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 49957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 500cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 501cee3aa6bSSatish Balay col++; a += bs; 50257b952d6SSatish Balay } 50357b952d6SSatish Balay } 50457b952d6SSatish Balay } 50557b952d6SSatish Balay /* copy over the B part */ 50657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 50757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 50857b952d6SSatish Balay row = baij->rstart*bs; 50957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 51057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 51157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 51257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 51357b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 51457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 515cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 516cee3aa6bSSatish Balay col++; a += bs; 51757b952d6SSatish Balay } 51857b952d6SSatish Balay } 51957b952d6SSatish Balay } 52057b952d6SSatish Balay PetscFree(rvals); 5216d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5226d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 52357b952d6SSatish Balay if (!rank) { 52457b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 52557b952d6SSatish Balay } 52657b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 52757b952d6SSatish Balay } 52857b952d6SSatish Balay } 52957b952d6SSatish Balay return 0; 53057b952d6SSatish Balay } 53157b952d6SSatish Balay 53257b952d6SSatish Balay 53357b952d6SSatish Balay 534026e39d0SSatish Balay #undef __FUNCTION__ 535026e39d0SSatish Balay #define __FUNCTION__ "MatView_MPIBAIJ" 53657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 53757b952d6SSatish Balay { 53857b952d6SSatish Balay Mat mat = (Mat) obj; 53957b952d6SSatish Balay int ierr; 54057b952d6SSatish Balay ViewerType vtype; 54157b952d6SSatish Balay 54257b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 54357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 54457b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 54557b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 54657b952d6SSatish Balay } 54757b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 54857b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 54957b952d6SSatish Balay } 55057b952d6SSatish Balay return 0; 55157b952d6SSatish Balay } 55257b952d6SSatish Balay 553026e39d0SSatish Balay #undef __FUNCTION__ 554026e39d0SSatish Balay #define __FUNCTION__ "MatDestroy_MPIBAIJ" 55579bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 55679bdfe76SSatish Balay { 55779bdfe76SSatish Balay Mat mat = (Mat) obj; 55879bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 55979bdfe76SSatish Balay int ierr; 56079bdfe76SSatish Balay 56179bdfe76SSatish Balay #if defined(PETSC_LOG) 56279bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 56379bdfe76SSatish Balay #endif 56479bdfe76SSatish Balay 56579bdfe76SSatish Balay PetscFree(baij->rowners); 56679bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 56779bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 56879bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 56979bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 57079bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 57179bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 57279bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 57379bdfe76SSatish Balay PetscFree(baij); 57490f02eecSBarry Smith if (mat->mapping) { 57590f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 57690f02eecSBarry Smith } 57779bdfe76SSatish Balay PLogObjectDestroy(mat); 57879bdfe76SSatish Balay PetscHeaderDestroy(mat); 57979bdfe76SSatish Balay return 0; 58079bdfe76SSatish Balay } 58179bdfe76SSatish Balay 582026e39d0SSatish Balay #undef __FUNCTION__ 583026e39d0SSatish Balay #define __FUNCTION__ "MatMult_MPIBAIJ" 584cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 585cee3aa6bSSatish Balay { 586cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 58747b4a8eaSLois Curfman McInnes int ierr, nt; 588cee3aa6bSSatish Balay 589c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 59047b4a8eaSLois Curfman McInnes if (nt != a->n) { 591e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and xx"); 59247b4a8eaSLois Curfman McInnes } 593c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 59447b4a8eaSLois Curfman McInnes if (nt != a->m) { 595e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 59647b4a8eaSLois Curfman McInnes } 59743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 59943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 60143a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602cee3aa6bSSatish Balay return 0; 603cee3aa6bSSatish Balay } 604cee3aa6bSSatish Balay 605026e39d0SSatish Balay #undef __FUNCTION__ 606026e39d0SSatish Balay #define __FUNCTION__ "MatMultAdd_MPIBAIJ" 607cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 608cee3aa6bSSatish Balay { 609cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 610cee3aa6bSSatish Balay int ierr; 61143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 612cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 61343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 614cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 615cee3aa6bSSatish Balay return 0; 616cee3aa6bSSatish Balay } 617cee3aa6bSSatish Balay 618026e39d0SSatish Balay #undef __FUNCTION__ 619026e39d0SSatish Balay #define __FUNCTION__ "MatMultTrans_MPIBAIJ" 620cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 621cee3aa6bSSatish Balay { 622cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 623cee3aa6bSSatish Balay int ierr; 624cee3aa6bSSatish Balay 625cee3aa6bSSatish Balay /* do nondiagonal part */ 626cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 627cee3aa6bSSatish Balay /* send it on its way */ 628537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 629cee3aa6bSSatish Balay /* do local part */ 630cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 631cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 632cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 633cee3aa6bSSatish Balay /* but is not perhaps always true. */ 634639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635cee3aa6bSSatish Balay return 0; 636cee3aa6bSSatish Balay } 637cee3aa6bSSatish Balay 638026e39d0SSatish Balay #undef __FUNCTION__ 639026e39d0SSatish Balay #define __FUNCTION__ "MatMultTransAdd_MPIBAIJ" 640cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 641cee3aa6bSSatish Balay { 642cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 643cee3aa6bSSatish Balay int ierr; 644cee3aa6bSSatish Balay 645cee3aa6bSSatish Balay /* do nondiagonal part */ 646cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 647cee3aa6bSSatish Balay /* send it on its way */ 648537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 649cee3aa6bSSatish Balay /* do local part */ 650cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 651cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 652cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 653cee3aa6bSSatish Balay /* but is not perhaps always true. */ 654537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 655cee3aa6bSSatish Balay return 0; 656cee3aa6bSSatish Balay } 657cee3aa6bSSatish Balay 658cee3aa6bSSatish Balay /* 659cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 660cee3aa6bSSatish Balay diagonal block 661cee3aa6bSSatish Balay */ 662026e39d0SSatish Balay #undef __FUNCTION__ 663026e39d0SSatish Balay #define __FUNCTION__ "MatGetDiagonal_MPIBAIJ" 664cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 665cee3aa6bSSatish Balay { 666cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 667cee3aa6bSSatish Balay if (a->M != a->N) 668e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 669cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 670cee3aa6bSSatish Balay } 671cee3aa6bSSatish Balay 672026e39d0SSatish Balay #undef __FUNCTION__ 673026e39d0SSatish Balay #define __FUNCTION__ "MatScale_MPIBAIJ" 674cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 675cee3aa6bSSatish Balay { 676cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 677cee3aa6bSSatish Balay int ierr; 678cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 679cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 680cee3aa6bSSatish Balay return 0; 681cee3aa6bSSatish Balay } 682026e39d0SSatish Balay 683026e39d0SSatish Balay #undef __FUNCTION__ 684026e39d0SSatish Balay #define __FUNCTION__ "MatGetSize_MPIBAIJ" 68557b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 68657b952d6SSatish Balay { 68757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 68857b952d6SSatish Balay *m = mat->M; *n = mat->N; 68957b952d6SSatish Balay return 0; 69057b952d6SSatish Balay } 69157b952d6SSatish Balay 692026e39d0SSatish Balay #undef __FUNCTION__ 693026e39d0SSatish Balay #define __FUNCTION__ "MatGetLocalSize_MPIBAIJ" 69457b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 69557b952d6SSatish Balay { 69657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 69757b952d6SSatish Balay *m = mat->m; *n = mat->N; 69857b952d6SSatish Balay return 0; 69957b952d6SSatish Balay } 70057b952d6SSatish Balay 701026e39d0SSatish Balay #undef __FUNCTION__ 702026e39d0SSatish Balay #define __FUNCTION__ "MatGetOwnershipRange_MPIBAIJ" 70357b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 70457b952d6SSatish Balay { 70557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 70657b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 70757b952d6SSatish Balay return 0; 70857b952d6SSatish Balay } 70957b952d6SSatish Balay 710acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 711acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 712acdf5bf4SSatish Balay 713026e39d0SSatish Balay #undef __FUNCTION__ 714026e39d0SSatish Balay #define __FUNCTION__ "MatGetRow_MPIBAIJ" 715acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 716acdf5bf4SSatish Balay { 717acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 718acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 719acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 720d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 721d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 722acdf5bf4SSatish Balay 723e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 724acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 725acdf5bf4SSatish Balay 726acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 727acdf5bf4SSatish Balay /* 728acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 729acdf5bf4SSatish Balay */ 730acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 731bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 732bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 733acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 734acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 735acdf5bf4SSatish Balay } 736acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 737acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 738acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 739acdf5bf4SSatish Balay } 740acdf5bf4SSatish Balay 741acdf5bf4SSatish Balay 742e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 743d9d09a02SSatish Balay lrow = row - brstart; 744acdf5bf4SSatish Balay 745acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 746acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 747acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 748acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 749acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 750acdf5bf4SSatish Balay nztot = nzA + nzB; 751acdf5bf4SSatish Balay 752acdf5bf4SSatish Balay cmap = mat->garray; 753acdf5bf4SSatish Balay if (v || idx) { 754acdf5bf4SSatish Balay if (nztot) { 755acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 756acdf5bf4SSatish Balay int imark = -1; 757acdf5bf4SSatish Balay if (v) { 758acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 759acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 760d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 761acdf5bf4SSatish Balay else break; 762acdf5bf4SSatish Balay } 763acdf5bf4SSatish Balay imark = i; 764acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 765acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 766acdf5bf4SSatish Balay } 767acdf5bf4SSatish Balay if (idx) { 768acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 769acdf5bf4SSatish Balay if (imark > -1) { 770acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 771bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 772acdf5bf4SSatish Balay } 773acdf5bf4SSatish Balay } else { 774acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 775d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 776d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 777acdf5bf4SSatish Balay else break; 778acdf5bf4SSatish Balay } 779acdf5bf4SSatish Balay imark = i; 780acdf5bf4SSatish Balay } 781d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 782d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 783acdf5bf4SSatish Balay } 784acdf5bf4SSatish Balay } 785d212a18eSSatish Balay else { 786d212a18eSSatish Balay if (idx) *idx = 0; 787d212a18eSSatish Balay if (v) *v = 0; 788d212a18eSSatish Balay } 789acdf5bf4SSatish Balay } 790acdf5bf4SSatish Balay *nz = nztot; 791acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 792acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 793acdf5bf4SSatish Balay return 0; 794acdf5bf4SSatish Balay } 795acdf5bf4SSatish Balay 796026e39d0SSatish Balay #undef __FUNCTION__ 797026e39d0SSatish Balay #define __FUNCTION__ "MatRestoreRow_MPIBAIJ" 798acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 799acdf5bf4SSatish Balay { 800acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 801acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 802e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 803acdf5bf4SSatish Balay } 804acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 805acdf5bf4SSatish Balay return 0; 806acdf5bf4SSatish Balay } 807acdf5bf4SSatish Balay 808026e39d0SSatish Balay #undef __FUNCTION__ 809026e39d0SSatish Balay #define __FUNCTION__ "MatGetBlockSize_MPIBAIJ" 8105a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 8115a838052SSatish Balay { 8125a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 8135a838052SSatish Balay *bs = baij->bs; 8145a838052SSatish Balay return 0; 8155a838052SSatish Balay } 8165a838052SSatish Balay 817026e39d0SSatish Balay #undef __FUNCTION__ 818026e39d0SSatish Balay #define __FUNCTION__ "MatZeroEntries_MPIBAIJ" 81958667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 82058667388SSatish Balay { 82158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 82258667388SSatish Balay int ierr; 82358667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 82458667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 82558667388SSatish Balay return 0; 82658667388SSatish Balay } 8270ac07820SSatish Balay 828026e39d0SSatish Balay #undef __FUNCTION__ 829026e39d0SSatish Balay #define __FUNCTION__ "MatGetInfo_MPIBAIJ" 8304e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 8310ac07820SSatish Balay { 8324e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 8334e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 8347d57db60SLois Curfman McInnes int ierr; 8357d57db60SLois Curfman McInnes double isend[5], irecv[5]; 8360ac07820SSatish Balay 8374e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 8384e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 8394e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 8404e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 8414e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 8424e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 8434e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 8444e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 8454e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 8460ac07820SSatish Balay if (flag == MAT_LOCAL) { 8474e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 8484e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 8494e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 8504e220ebcSLois Curfman McInnes info->memory = isend[3]; 8514e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 8520ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 8530ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm); 8544e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8554e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8564e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8574e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8584e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8590ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 8600ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm); 8614e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8624e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8634e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8644e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8654e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8660ac07820SSatish Balay } 8674e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8684e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 8694e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 8700ac07820SSatish Balay return 0; 8710ac07820SSatish Balay } 8720ac07820SSatish Balay 873026e39d0SSatish Balay #undef __FUNCTION__ 874026e39d0SSatish Balay #define __FUNCTION__ "MatSetOption_MPIBAIJ" 87558667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 87658667388SSatish Balay { 87758667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 87858667388SSatish Balay 87958667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 88058667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 8816da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 882b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 883b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 884b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 885b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 886aeafbbfcSLois Curfman McInnes a->roworiented = 1; 88758667388SSatish Balay MatSetOption(a->A,op); 88858667388SSatish Balay MatSetOption(a->B,op); 889b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 8906da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 89158667388SSatish Balay op == MAT_SYMMETRIC || 89258667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 89358667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 89458667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 89558667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 89658667388SSatish Balay a->roworiented = 0; 89758667388SSatish Balay MatSetOption(a->A,op); 89858667388SSatish Balay MatSetOption(a->B,op); 89990f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 90090f02eecSBarry Smith a->donotstash = 1; 90190f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 902e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 90358667388SSatish Balay else 904e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 90558667388SSatish Balay return 0; 90658667388SSatish Balay } 90758667388SSatish Balay 908026e39d0SSatish Balay #undef __FUNCTION__ 909026e39d0SSatish Balay #define __FUNCTION__ "MatTranspose_MPIBAIJ(" 9100ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 9110ac07820SSatish Balay { 9120ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 9130ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 9140ac07820SSatish Balay Mat B; 9150ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 9160ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 9170ac07820SSatish Balay Scalar *a; 9180ac07820SSatish Balay 9190ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 920e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 9210ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 9220ac07820SSatish Balay CHKERRQ(ierr); 9230ac07820SSatish Balay 9240ac07820SSatish Balay /* copy over the A part */ 9250ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 9260ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 9270ac07820SSatish Balay row = baij->rstart; 9280ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 9290ac07820SSatish Balay 9300ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 9310ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 9320ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 9330ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 9340ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 9350ac07820SSatish Balay for (k=0; k<bs; k++ ) { 9360ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 9370ac07820SSatish Balay col++; a += bs; 9380ac07820SSatish Balay } 9390ac07820SSatish Balay } 9400ac07820SSatish Balay } 9410ac07820SSatish Balay /* copy over the B part */ 9420ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 9430ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 9440ac07820SSatish Balay row = baij->rstart*bs; 9450ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 9460ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 9470ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 9480ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 9490ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 9500ac07820SSatish Balay for (k=0; k<bs; k++ ) { 9510ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 9520ac07820SSatish Balay col++; a += bs; 9530ac07820SSatish Balay } 9540ac07820SSatish Balay } 9550ac07820SSatish Balay } 9560ac07820SSatish Balay PetscFree(rvals); 9570ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 9580ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 9590ac07820SSatish Balay 9600ac07820SSatish Balay if (matout != PETSC_NULL) { 9610ac07820SSatish Balay *matout = B; 9620ac07820SSatish Balay } else { 9630ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 9640ac07820SSatish Balay PetscFree(baij->rowners); 9650ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 9660ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 9670ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 9680ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 9690ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 9700ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 9710ac07820SSatish Balay PetscFree(baij); 9720ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 9730ac07820SSatish Balay PetscHeaderDestroy(B); 9740ac07820SSatish Balay } 9750ac07820SSatish Balay return 0; 9760ac07820SSatish Balay } 9770e95ebc0SSatish Balay 978026e39d0SSatish Balay #undef __FUNCTION__ 979026e39d0SSatish Balay #define __FUNCTION__ "MatDiagonalScale_MPIBAIJ" 9800e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 9810e95ebc0SSatish Balay { 9820e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 9830e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 9840e95ebc0SSatish Balay int ierr,s1,s2,s3; 9850e95ebc0SSatish Balay 9860e95ebc0SSatish Balay if (ll) { 9870e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 9880e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 989e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 9900e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 9910e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 9920e95ebc0SSatish Balay } 993e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 9940e95ebc0SSatish Balay return 0; 9950e95ebc0SSatish Balay } 9960e95ebc0SSatish Balay 9970ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 9980ac07820SSatish Balay matrix is square and the column and row owerships are identical. 9990ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 10000ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 10010ac07820SSatish Balay routine. 10020ac07820SSatish Balay */ 1003026e39d0SSatish Balay #undef __FUNCTION__ 1004026e39d0SSatish Balay #define __FUNCTION__ "MatZeroRows_MPIBAIJ" 10050ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 10060ac07820SSatish Balay { 10070ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 10080ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 10090ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 10100ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 10110ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 10120ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 10130ac07820SSatish Balay MPI_Comm comm = A->comm; 10140ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 10150ac07820SSatish Balay MPI_Status recv_status,*send_status; 10160ac07820SSatish Balay IS istmp; 10170ac07820SSatish Balay 10180ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 10190ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 10200ac07820SSatish Balay 10210ac07820SSatish Balay /* first count number of contributors to each processor */ 10220ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 10230ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 10240ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 10250ac07820SSatish Balay for ( i=0; i<N; i++ ) { 10260ac07820SSatish Balay idx = rows[i]; 10270ac07820SSatish Balay found = 0; 10280ac07820SSatish Balay for ( j=0; j<size; j++ ) { 10290ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 10300ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 10310ac07820SSatish Balay } 10320ac07820SSatish Balay } 1033e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 10340ac07820SSatish Balay } 10350ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 10360ac07820SSatish Balay 10370ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 10380ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 10390ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 10400ac07820SSatish Balay nrecvs = work[rank]; 10410ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 10420ac07820SSatish Balay nmax = work[rank]; 10430ac07820SSatish Balay PetscFree(work); 10440ac07820SSatish Balay 10450ac07820SSatish Balay /* post receives: */ 10460ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 10470ac07820SSatish Balay CHKPTRQ(rvalues); 10480ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 10490ac07820SSatish Balay CHKPTRQ(recv_waits); 10500ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 10510ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 10520ac07820SSatish Balay } 10530ac07820SSatish Balay 10540ac07820SSatish Balay /* do sends: 10550ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 10560ac07820SSatish Balay the ith processor 10570ac07820SSatish Balay */ 10580ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 10590ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 10600ac07820SSatish Balay CHKPTRQ(send_waits); 10610ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 10620ac07820SSatish Balay starts[0] = 0; 10630ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 10640ac07820SSatish Balay for ( i=0; i<N; i++ ) { 10650ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 10660ac07820SSatish Balay } 10670ac07820SSatish Balay ISRestoreIndices(is,&rows); 10680ac07820SSatish Balay 10690ac07820SSatish Balay starts[0] = 0; 10700ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 10710ac07820SSatish Balay count = 0; 10720ac07820SSatish Balay for ( i=0; i<size; i++ ) { 10730ac07820SSatish Balay if (procs[i]) { 10740ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 10750ac07820SSatish Balay } 10760ac07820SSatish Balay } 10770ac07820SSatish Balay PetscFree(starts); 10780ac07820SSatish Balay 10790ac07820SSatish Balay base = owners[rank]*bs; 10800ac07820SSatish Balay 10810ac07820SSatish Balay /* wait on receives */ 10820ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 10830ac07820SSatish Balay source = lens + nrecvs; 10840ac07820SSatish Balay count = nrecvs; slen = 0; 10850ac07820SSatish Balay while (count) { 10860ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 10870ac07820SSatish Balay /* unpack receives into our local space */ 10880ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 10890ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 10900ac07820SSatish Balay lens[imdex] = n; 10910ac07820SSatish Balay slen += n; 10920ac07820SSatish Balay count--; 10930ac07820SSatish Balay } 10940ac07820SSatish Balay PetscFree(recv_waits); 10950ac07820SSatish Balay 10960ac07820SSatish Balay /* move the data into the send scatter */ 10970ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 10980ac07820SSatish Balay count = 0; 10990ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 11000ac07820SSatish Balay values = rvalues + i*nmax; 11010ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 11020ac07820SSatish Balay lrows[count++] = values[j] - base; 11030ac07820SSatish Balay } 11040ac07820SSatish Balay } 11050ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 11060ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 11070ac07820SSatish Balay 11080ac07820SSatish Balay /* actually zap the local rows */ 1109537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 11100ac07820SSatish Balay PLogObjectParent(A,istmp); 11110ac07820SSatish Balay PetscFree(lrows); 11120ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 11130ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 11140ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 11150ac07820SSatish Balay 11160ac07820SSatish Balay /* wait on sends */ 11170ac07820SSatish Balay if (nsends) { 11180ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 11190ac07820SSatish Balay CHKPTRQ(send_status); 11200ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 11210ac07820SSatish Balay PetscFree(send_status); 11220ac07820SSatish Balay } 11230ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 11240ac07820SSatish Balay 11250ac07820SSatish Balay return 0; 11260ac07820SSatish Balay } 1127ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 1128026e39d0SSatish Balay #undef __FUNCTION__ 1129026e39d0SSatish Balay #define __FUNCTION__ "MatPrintHelp_MPIBAIJ" 1130ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A) 1131ba4ca20aSSatish Balay { 1132ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1133ba4ca20aSSatish Balay 1134ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1135ba4ca20aSSatish Balay else return 0; 1136ba4ca20aSSatish Balay } 11370ac07820SSatish Balay 1138*bb5a7306SBarry Smith #undef __FUNCTION__ 1139*bb5a7306SBarry Smith #define __FUNCTION__ "MatSetUnfactored_MPIBAIJ" 1140*bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A) 1141*bb5a7306SBarry Smith { 1142*bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1143*bb5a7306SBarry Smith int ierr; 1144*bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1145*bb5a7306SBarry Smith return 0; 1146*bb5a7306SBarry Smith } 1147*bb5a7306SBarry Smith 11480ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 11490ac07820SSatish Balay 115079bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 115179bdfe76SSatish Balay static struct _MatOps MatOps = { 1152bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 11534c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 11544c50302cSBarry Smith 0,0,0,0, 11550ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 11560e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 115758667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 11584c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 11594c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 11604c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1161905e6a2fSBarry Smith 0,0,0,MatConvertSameType_MPIBAIJ,0,0, 1162d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1163ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1164*bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1165*bb5a7306SBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ}; 116679bdfe76SSatish Balay 116779bdfe76SSatish Balay 1168026e39d0SSatish Balay #undef __FUNCTION__ 1169026e39d0SSatish Balay #define __FUNCTION__ "MatCreateMPIBAIJ" 117079bdfe76SSatish Balay /*@C 117179bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 117279bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 117379bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 117479bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 117579bdfe76SSatish Balay performance can be increased by more than a factor of 50. 117679bdfe76SSatish Balay 117779bdfe76SSatish Balay Input Parameters: 117879bdfe76SSatish Balay . comm - MPI communicator 117979bdfe76SSatish Balay . bs - size of blockk 118079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 118192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 118392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 118492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 118592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 118679bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 118792e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 118879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 118979bdfe76SSatish Balay submatrix (same for all local rows) 119092e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 119192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 119292e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 119392e8d321SLois Curfman McInnes it is zero. 119492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 119579bdfe76SSatish Balay submatrix (same for all local rows). 119692e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 119792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 119892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 119979bdfe76SSatish Balay 120079bdfe76SSatish Balay Output Parameter: 120179bdfe76SSatish Balay . A - the matrix 120279bdfe76SSatish Balay 120379bdfe76SSatish Balay Notes: 120479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 120579bdfe76SSatish Balay (possibly both). 120679bdfe76SSatish Balay 120779bdfe76SSatish Balay Storage Information: 120879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 120979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 121079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 121179bdfe76SSatish Balay local matrix (a rectangular submatrix). 121279bdfe76SSatish Balay 121379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 121479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 121579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 121679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 121779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 121879bdfe76SSatish Balay 121979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 122079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 122179bdfe76SSatish Balay 122279bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 122379bdfe76SSatish Balay $ ------------------- 122479bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 122579bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 122679bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 122779bdfe76SSatish Balay $ ------------------- 122879bdfe76SSatish Balay $ 122979bdfe76SSatish Balay 123079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 123179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 123279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 123357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 123479bdfe76SSatish Balay 123579bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 123679bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 123779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 123892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 123992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 12406da5968aSLois Curfman McInnes matrices. 124179bdfe76SSatish Balay 124292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 124379bdfe76SSatish Balay 124479bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 124579bdfe76SSatish Balay @*/ 124679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 124779bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 124879bdfe76SSatish Balay { 124979bdfe76SSatish Balay Mat B; 125079bdfe76SSatish Balay Mat_MPIBAIJ *b; 12514c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 125279bdfe76SSatish Balay 1253e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 125479bdfe76SSatish Balay *A = 0; 125579bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 125679bdfe76SSatish Balay PLogObjectCreate(B); 125779bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 125879bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 125979bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 12604c50302cSBarry Smith MPI_Comm_size(comm,&size); 12614c50302cSBarry Smith if (size == 1) { 12624c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 12634c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 12644c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 12654c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 12664c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 12674c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 12684c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 12694c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 12704c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 12714c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 12724c50302cSBarry Smith } 12734c50302cSBarry Smith 127479bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 127579bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 127690f02eecSBarry Smith B->mapping = 0; 127779bdfe76SSatish Balay B->factor = 0; 127879bdfe76SSatish Balay B->assembled = PETSC_FALSE; 127979bdfe76SSatish Balay 128079bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 128179bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 128279bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 128379bdfe76SSatish Balay 128479bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1285e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1286e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1287e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1288cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1289cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 129079bdfe76SSatish Balay 129179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 129279bdfe76SSatish Balay work[0] = m; work[1] = n; 129379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 129479bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 129579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 129679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 129779bdfe76SSatish Balay } 129879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 129979bdfe76SSatish Balay Mbs = M/bs; 1300e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 130179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 130279bdfe76SSatish Balay m = mbs*bs; 130379bdfe76SSatish Balay } 130479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 130579bdfe76SSatish Balay Nbs = N/bs; 1306e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 130779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 130879bdfe76SSatish Balay n = nbs*bs; 130979bdfe76SSatish Balay } 1310e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 131179bdfe76SSatish Balay 131279bdfe76SSatish Balay b->m = m; B->m = m; 131379bdfe76SSatish Balay b->n = n; B->n = n; 131479bdfe76SSatish Balay b->N = N; B->N = N; 131579bdfe76SSatish Balay b->M = M; B->M = M; 131679bdfe76SSatish Balay b->bs = bs; 131779bdfe76SSatish Balay b->bs2 = bs*bs; 131879bdfe76SSatish Balay b->mbs = mbs; 131979bdfe76SSatish Balay b->nbs = nbs; 132079bdfe76SSatish Balay b->Mbs = Mbs; 132179bdfe76SSatish Balay b->Nbs = Nbs; 132279bdfe76SSatish Balay 132379bdfe76SSatish Balay /* build local table of row and column ownerships */ 132479bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 132579bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 13260ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 132779bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 132879bdfe76SSatish Balay b->rowners[0] = 0; 132979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 133079bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 133179bdfe76SSatish Balay } 133279bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 133379bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 13344fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 13354fa0d573SSatish Balay b->rend_bs = b->rend * bs; 13364fa0d573SSatish Balay 133779bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 133879bdfe76SSatish Balay b->cowners[0] = 0; 133979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 134079bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 134179bdfe76SSatish Balay } 134279bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 134379bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 13444fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 13454fa0d573SSatish Balay b->cend_bs = b->cend * bs; 134679bdfe76SSatish Balay 134779bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 134879bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 134979bdfe76SSatish Balay PLogObjectParent(B,b->A); 135079bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 135179bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 135279bdfe76SSatish Balay PLogObjectParent(B,b->B); 135379bdfe76SSatish Balay 135479bdfe76SSatish Balay /* build cache for off array entries formed */ 135579bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 135690f02eecSBarry Smith b->donotstash = 0; 135779bdfe76SSatish Balay b->colmap = 0; 135879bdfe76SSatish Balay b->garray = 0; 135979bdfe76SSatish Balay b->roworiented = 1; 136079bdfe76SSatish Balay 136179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 136279bdfe76SSatish Balay b->lvec = 0; 136379bdfe76SSatish Balay b->Mvctx = 0; 136479bdfe76SSatish Balay 136579bdfe76SSatish Balay /* stuff for MatGetRow() */ 136679bdfe76SSatish Balay b->rowindices = 0; 136779bdfe76SSatish Balay b->rowvalues = 0; 136879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 136979bdfe76SSatish Balay 137079bdfe76SSatish Balay *A = B; 137179bdfe76SSatish Balay return 0; 137279bdfe76SSatish Balay } 1373026e39d0SSatish Balay 1374026e39d0SSatish Balay #undef __FUNCTION__ 1375026e39d0SSatish Balay #define __FUNCTION__ "MatConvertSameType_MPIBAIJ" 13760ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 13770ac07820SSatish Balay { 13780ac07820SSatish Balay Mat mat; 13790ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 13800ac07820SSatish Balay int ierr, len=0, flg; 13810ac07820SSatish Balay 13820ac07820SSatish Balay *newmat = 0; 13830ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 13840ac07820SSatish Balay PLogObjectCreate(mat); 13850ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 13860ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 13870ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 13880ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 13890ac07820SSatish Balay mat->factor = matin->factor; 13900ac07820SSatish Balay mat->assembled = PETSC_TRUE; 13910ac07820SSatish Balay 13920ac07820SSatish Balay a->m = mat->m = oldmat->m; 13930ac07820SSatish Balay a->n = mat->n = oldmat->n; 13940ac07820SSatish Balay a->M = mat->M = oldmat->M; 13950ac07820SSatish Balay a->N = mat->N = oldmat->N; 13960ac07820SSatish Balay 13970ac07820SSatish Balay a->bs = oldmat->bs; 13980ac07820SSatish Balay a->bs2 = oldmat->bs2; 13990ac07820SSatish Balay a->mbs = oldmat->mbs; 14000ac07820SSatish Balay a->nbs = oldmat->nbs; 14010ac07820SSatish Balay a->Mbs = oldmat->Mbs; 14020ac07820SSatish Balay a->Nbs = oldmat->Nbs; 14030ac07820SSatish Balay 14040ac07820SSatish Balay a->rstart = oldmat->rstart; 14050ac07820SSatish Balay a->rend = oldmat->rend; 14060ac07820SSatish Balay a->cstart = oldmat->cstart; 14070ac07820SSatish Balay a->cend = oldmat->cend; 14080ac07820SSatish Balay a->size = oldmat->size; 14090ac07820SSatish Balay a->rank = oldmat->rank; 14100ac07820SSatish Balay a->insertmode = NOT_SET_VALUES; 14110ac07820SSatish Balay a->rowvalues = 0; 14120ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 14130ac07820SSatish Balay 14140ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 14150ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 14160ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 14170ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 14180ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14190ac07820SSatish Balay if (oldmat->colmap) { 14200ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 14210ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 14220ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 14230ac07820SSatish Balay } else a->colmap = 0; 14240ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 14250ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 14260ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 14270ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 14280ac07820SSatish Balay } else a->garray = 0; 14290ac07820SSatish Balay 14300ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 14310ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 14320ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 14330ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 14340ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 14350ac07820SSatish Balay PLogObjectParent(mat,a->A); 14360ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 14370ac07820SSatish Balay PLogObjectParent(mat,a->B); 14380ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 14390ac07820SSatish Balay if (flg) { 14400ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 14410ac07820SSatish Balay } 14420ac07820SSatish Balay *newmat = mat; 14430ac07820SSatish Balay return 0; 14440ac07820SSatish Balay } 144557b952d6SSatish Balay 144657b952d6SSatish Balay #include "sys.h" 144757b952d6SSatish Balay 1448026e39d0SSatish Balay #undef __FUNCTION__ 1449026e39d0SSatish Balay #define __FUNCTION__ "MatLoad_MPIBAIJ" 145057b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 145157b952d6SSatish Balay { 145257b952d6SSatish Balay Mat A; 145357b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 145457b952d6SSatish Balay Scalar *vals,*buf; 145557b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 145657b952d6SSatish Balay MPI_Status status; 1457cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 145857b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 145957b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 146057b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 146157b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 146257b952d6SSatish Balay 146357b952d6SSatish Balay 146457b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 146557b952d6SSatish Balay bs2 = bs*bs; 146657b952d6SSatish Balay 146757b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 146857b952d6SSatish Balay if (!rank) { 146957b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 147057b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1471e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 147257b952d6SSatish Balay } 147357b952d6SSatish Balay 147457b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 147557b952d6SSatish Balay M = header[1]; N = header[2]; 147657b952d6SSatish Balay 1477e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 147857b952d6SSatish Balay 147957b952d6SSatish Balay /* 148057b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 148157b952d6SSatish Balay divisible by the blocksize 148257b952d6SSatish Balay */ 148357b952d6SSatish Balay Mbs = M/bs; 148457b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 148557b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 148657b952d6SSatish Balay else Mbs++; 148757b952d6SSatish Balay if (extra_rows &&!rank) { 1488b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 148957b952d6SSatish Balay } 1490537820f0SBarry Smith 149157b952d6SSatish Balay /* determine ownership of all rows */ 149257b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 149357b952d6SSatish Balay m = mbs * bs; 1494cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1495cee3aa6bSSatish Balay browners = rowners + size + 1; 149657b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 149757b952d6SSatish Balay rowners[0] = 0; 1498cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1499cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 150057b952d6SSatish Balay rstart = rowners[rank]; 150157b952d6SSatish Balay rend = rowners[rank+1]; 150257b952d6SSatish Balay 150357b952d6SSatish Balay /* distribute row lengths to all processors */ 150457b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 150557b952d6SSatish Balay if (!rank) { 150657b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 150757b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 150857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 150957b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1510cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1511cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 151257b952d6SSatish Balay PetscFree(sndcounts); 151357b952d6SSatish Balay } 151457b952d6SSatish Balay else { 151557b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 151657b952d6SSatish Balay } 151757b952d6SSatish Balay 151857b952d6SSatish Balay if (!rank) { 151957b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 152057b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 152157b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 152257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 152357b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 152457b952d6SSatish Balay procsnz[i] += rowlengths[j]; 152557b952d6SSatish Balay } 152657b952d6SSatish Balay } 152757b952d6SSatish Balay PetscFree(rowlengths); 152857b952d6SSatish Balay 152957b952d6SSatish Balay /* determine max buffer needed and allocate it */ 153057b952d6SSatish Balay maxnz = 0; 153157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 153257b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 153357b952d6SSatish Balay } 153457b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 153557b952d6SSatish Balay 153657b952d6SSatish Balay /* read in my part of the matrix column indices */ 153757b952d6SSatish Balay nz = procsnz[0]; 153857b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 153957b952d6SSatish Balay mycols = ibuf; 1540cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 154157b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1542cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1543cee3aa6bSSatish Balay 154457b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 154557b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 154657b952d6SSatish Balay nz = procsnz[i]; 154757b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 154857b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 154957b952d6SSatish Balay } 155057b952d6SSatish Balay /* read in the stuff for the last proc */ 155157b952d6SSatish Balay if ( size != 1 ) { 155257b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 155357b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 155457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1555cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 155657b952d6SSatish Balay } 155757b952d6SSatish Balay PetscFree(cols); 155857b952d6SSatish Balay } 155957b952d6SSatish Balay else { 156057b952d6SSatish Balay /* determine buffer space needed for message */ 156157b952d6SSatish Balay nz = 0; 156257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 156357b952d6SSatish Balay nz += locrowlens[i]; 156457b952d6SSatish Balay } 156557b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 156657b952d6SSatish Balay mycols = ibuf; 156757b952d6SSatish Balay /* receive message of column indices*/ 156857b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 156957b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1570e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 157157b952d6SSatish Balay } 157257b952d6SSatish Balay 157357b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1574cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1575cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 157657b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1577cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 157857b952d6SSatish Balay masked1 = mask + Mbs; 157957b952d6SSatish Balay masked2 = masked1 + Mbs; 158057b952d6SSatish Balay rowcount = 0; nzcount = 0; 158157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 158257b952d6SSatish Balay dcount = 0; 158357b952d6SSatish Balay odcount = 0; 158457b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 158557b952d6SSatish Balay kmax = locrowlens[rowcount]; 158657b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 158757b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 158857b952d6SSatish Balay if (!mask[tmp]) { 158957b952d6SSatish Balay mask[tmp] = 1; 159057b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 159157b952d6SSatish Balay else masked1[dcount++] = tmp; 159257b952d6SSatish Balay } 159357b952d6SSatish Balay } 159457b952d6SSatish Balay rowcount++; 159557b952d6SSatish Balay } 1596cee3aa6bSSatish Balay 159757b952d6SSatish Balay dlens[i] = dcount; 159857b952d6SSatish Balay odlens[i] = odcount; 1599cee3aa6bSSatish Balay 160057b952d6SSatish Balay /* zero out the mask elements we set */ 160157b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 160257b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 160357b952d6SSatish Balay } 1604cee3aa6bSSatish Balay 160557b952d6SSatish Balay /* create our matrix */ 1606537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1607537820f0SBarry Smith CHKERRQ(ierr); 160857b952d6SSatish Balay A = *newmat; 16096d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 161057b952d6SSatish Balay 161157b952d6SSatish Balay if (!rank) { 161257b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 161357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 161457b952d6SSatish Balay nz = procsnz[0]; 161557b952d6SSatish Balay vals = buf; 1616cee3aa6bSSatish Balay mycols = ibuf; 1617cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 161857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1619cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1620537820f0SBarry Smith 162157b952d6SSatish Balay /* insert into matrix */ 162257b952d6SSatish Balay jj = rstart*bs; 162357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 162457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 162557b952d6SSatish Balay mycols += locrowlens[i]; 162657b952d6SSatish Balay vals += locrowlens[i]; 162757b952d6SSatish Balay jj++; 162857b952d6SSatish Balay } 162957b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 163057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 163157b952d6SSatish Balay nz = procsnz[i]; 163257b952d6SSatish Balay vals = buf; 163357b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 163457b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 163557b952d6SSatish Balay } 163657b952d6SSatish Balay /* the last proc */ 163757b952d6SSatish Balay if ( size != 1 ){ 163857b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1639cee3aa6bSSatish Balay vals = buf; 164057b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 164157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1642cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 164357b952d6SSatish Balay } 164457b952d6SSatish Balay PetscFree(procsnz); 164557b952d6SSatish Balay } 164657b952d6SSatish Balay else { 164757b952d6SSatish Balay /* receive numeric values */ 164857b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 164957b952d6SSatish Balay 165057b952d6SSatish Balay /* receive message of values*/ 165157b952d6SSatish Balay vals = buf; 1652cee3aa6bSSatish Balay mycols = ibuf; 165357b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 165457b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1655e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 165657b952d6SSatish Balay 165757b952d6SSatish Balay /* insert into matrix */ 165857b952d6SSatish Balay jj = rstart*bs; 1659cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 166057b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 166157b952d6SSatish Balay mycols += locrowlens[i]; 166257b952d6SSatish Balay vals += locrowlens[i]; 166357b952d6SSatish Balay jj++; 166457b952d6SSatish Balay } 166557b952d6SSatish Balay } 166657b952d6SSatish Balay PetscFree(locrowlens); 166757b952d6SSatish Balay PetscFree(buf); 166857b952d6SSatish Balay PetscFree(ibuf); 166957b952d6SSatish Balay PetscFree(rowners); 167057b952d6SSatish Balay PetscFree(dlens); 1671cee3aa6bSSatish Balay PetscFree(mask); 16726d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 16736d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 167457b952d6SSatish Balay return 0; 167557b952d6SSatish Balay } 167657b952d6SSatish Balay 167757b952d6SSatish Balay 1678