18016bdd1SSatish Balay #ifndef lint 2*537820f0SBarry Smith static char vcid[] = "$Id: mmbaij.c,v 1.6 1996/08/08 14:43:40 bsmith Exp bsmith $"; 38016bdd1SSatish Balay #endif 48016bdd1SSatish Balay 58016bdd1SSatish Balay 68016bdd1SSatish Balay /* 7d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 88016bdd1SSatish Balay */ 970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 108016bdd1SSatish Balay #include "src/vec/vecimpl.h" 118016bdd1SSatish Balay 12d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat) 138016bdd1SSatish Balay { 14d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 15d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 16d9653453SSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17*537820f0SBarry Smith int col,bs = baij->bs,*tmp,*stmp; 188016bdd1SSatish Balay IS from,to; 198016bdd1SSatish Balay Vec gvec; 208016bdd1SSatish Balay 218016bdd1SSatish Balay /* For the first stab we make an array as long as the number of columns */ 22d9653453SSatish Balay /* mark those columns that are in baij->B */ 23d9653453SSatish Balay indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices); 24d9653453SSatish Balay PetscMemzero(indices,Nbs*sizeof(int)); 25d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 268016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 27d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 28d9653453SSatish Balay indices[aj[B->i[i] + j] ] = 1; 298016bdd1SSatish Balay } 308016bdd1SSatish Balay } 318016bdd1SSatish Balay 328016bdd1SSatish Balay /* form array of columns we need */ 338016bdd1SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 34d9653453SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 358016bdd1SSatish Balay ec = 0; 36d9653453SSatish Balay for ( i=0; i<Nbs; i++ ) { 378016bdd1SSatish Balay if (indices[i]) garray[ec++] = i; 388016bdd1SSatish Balay } 398016bdd1SSatish Balay 408016bdd1SSatish Balay /* make indices now point into garray */ 418016bdd1SSatish Balay for ( i=0; i<ec; i++ ) { 42d9653453SSatish Balay indices[garray[i]] = i; 438016bdd1SSatish Balay } 448016bdd1SSatish Balay 458016bdd1SSatish Balay /* compact out the extra columns in B */ 46d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 478016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 48d9653453SSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 498016bdd1SSatish Balay } 508016bdd1SSatish Balay } 51d9653453SSatish Balay B->nbs = ec; 52d9653453SSatish Balay B->n = ec*B->bs; 538016bdd1SSatish Balay PetscFree(indices); 548016bdd1SSatish Balay 5557b952d6SSatish Balay for ( i=0,col=0; i<ec; i++ ) { 5657b952d6SSatish Balay for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 57d9653453SSatish Balay } 588016bdd1SSatish Balay /* create local vector that is used to scatter into */ 59d9653453SSatish Balay ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 608016bdd1SSatish Balay 61c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 62c16cb8f2SBarry Smith 63*537820f0SBarry Smith /* ierr = ISCreateGeneral(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */ 64c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 65c16cb8f2SBarry Smith garray[i] = bs*garray[i]; 66c16cb8f2SBarry Smith } 67*537820f0SBarry Smith ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 68c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 69c16cb8f2SBarry Smith garray[i] = garray[i]/bs; 70c16cb8f2SBarry Smith } 71c16cb8f2SBarry Smith 72*537820f0SBarry Smith stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp); 73*537820f0SBarry Smith for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 74*537820f0SBarry Smith ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 75*537820f0SBarry Smith PetscFree(stmp); 768016bdd1SSatish Balay 778016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 788016bdd1SSatish Balay /* this is inefficient, but otherwise we must do either 798016bdd1SSatish Balay 1) save garray until the first actual scatter when the vector is known or 808016bdd1SSatish Balay 2) have another way of generating a scatter context without a vector.*/ 81d9653453SSatish Balay ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 828016bdd1SSatish Balay 838016bdd1SSatish Balay /* gnerate the scatter context */ 84d9653453SSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 85d9653453SSatish Balay PLogObjectParent(mat,baij->Mvctx); 86d9653453SSatish Balay PLogObjectParent(mat,baij->lvec); 878016bdd1SSatish Balay PLogObjectParent(mat,from); 888016bdd1SSatish Balay PLogObjectParent(mat,to); 89d9653453SSatish Balay baij->garray = garray; 908016bdd1SSatish Balay PLogObjectMemory(mat,(ec+1)*sizeof(int)); 918016bdd1SSatish Balay ierr = ISDestroy(from); CHKERRQ(ierr); 928016bdd1SSatish Balay ierr = ISDestroy(to); CHKERRQ(ierr); 938016bdd1SSatish Balay ierr = VecDestroy(gvec); 94d9653453SSatish Balay PetscFree(tmp); 958016bdd1SSatish Balay return 0; 968016bdd1SSatish Balay } 978016bdd1SSatish Balay 988016bdd1SSatish Balay 998016bdd1SSatish Balay /* 100d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1018016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1028016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1038016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1048016bdd1SSatish Balay 1058016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1068016bdd1SSatish Balay they are sloppy. 1078016bdd1SSatish Balay */ 108d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A) 1098016bdd1SSatish Balay { 110d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 111d9653453SSatish Balay Mat B = baij->B,Bnew; 112d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 113d9653453SSatish Balay int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 114d9653453SSatish Balay int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 115d9653453SSatish Balay Scalar *a=Bbaij->a; 1168016bdd1SSatish Balay 1178016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 118d9653453SSatish Balay ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 119d9653453SSatish Balay ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 120d9653453SSatish Balay ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 121d9653453SSatish Balay if (baij->colmap) { 122d9653453SSatish Balay PetscFree(baij->colmap); baij->colmap = 0; 123d9653453SSatish Balay PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 1248016bdd1SSatish Balay } 1258016bdd1SSatish Balay 1268016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1276d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1286d4a8577SBarry Smith MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1298016bdd1SSatish Balay 1308016bdd1SSatish Balay /* invent new B and copy stuff over */ 131d9653453SSatish Balay nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 132d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 133d9653453SSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 1348016bdd1SSatish Balay } 135d9653453SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 1368016bdd1SSatish Balay PetscFree(nz); 137d9653453SSatish Balay 138d9653453SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 139d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 140d9653453SSatish Balay rvals[0] = bs*i; 141d9653453SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 142d9653453SSatish Balay for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 14357b952d6SSatish Balay col = garray[Bbaij->j[j]]*bs; 144d9653453SSatish Balay for (k=0; k<bs; k++ ) { 145d9653453SSatish Balay ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 146d9653453SSatish Balay col++; 1478016bdd1SSatish Balay } 1488016bdd1SSatish Balay } 149d9653453SSatish Balay } 150d9653453SSatish Balay PetscFree(baij->garray); baij->garray = 0; 15157b952d6SSatish Balay PetscFree(rvals); 1528016bdd1SSatish Balay PLogObjectMemory(A,-ec*sizeof(int)); 1538016bdd1SSatish Balay ierr = MatDestroy(B); CHKERRQ(ierr); 1548016bdd1SSatish Balay PLogObjectParent(A,Bnew); 155d9653453SSatish Balay baij->B = Bnew; 1568016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1578016bdd1SSatish Balay return 0; 1588016bdd1SSatish Balay } 1598016bdd1SSatish Balay 1608016bdd1SSatish Balay 161