1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*0bdbc534SSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.16 1997/11/03 04:46:15 bsmith Exp balay $"; 38016bdd1SSatish Balay #endif 48016bdd1SSatish Balay 58016bdd1SSatish Balay 68016bdd1SSatish Balay /* 7d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 88016bdd1SSatish Balay */ 970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 108016bdd1SSatish Balay #include "src/vec/vecimpl.h" 118016bdd1SSatish Balay 125615d1e5SSatish Balay #undef __FUNC__ 135615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat) 158016bdd1SSatish Balay { 16d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 18d9653453SSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 19537820f0SBarry Smith int col,bs = baij->bs,*tmp,*stmp; 208016bdd1SSatish Balay IS from,to; 218016bdd1SSatish Balay Vec gvec; 228016bdd1SSatish Balay 233a40ed3dSBarry Smith PetscFunctionBegin; 248016bdd1SSatish Balay /* For the first stab we make an array as long as the number of columns */ 25d9653453SSatish Balay /* mark those columns that are in baij->B */ 269dbe0bc3SSatish Balay indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices); 27d9653453SSatish Balay PetscMemzero(indices,Nbs*sizeof(int)); 28d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 298016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 30d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 31d9653453SSatish Balay indices[aj[B->i[i] + j] ] = 1; 328016bdd1SSatish Balay } 338016bdd1SSatish Balay } 348016bdd1SSatish Balay 358016bdd1SSatish Balay /* form array of columns we need */ 368016bdd1SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 37d9653453SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 388016bdd1SSatish Balay ec = 0; 39d9653453SSatish Balay for ( i=0; i<Nbs; i++ ) { 40*0bdbc534SSatish Balay if (indices[i]) { 41*0bdbc534SSatish Balay garray[ec++] = i; 42*0bdbc534SSatish Balay } 438016bdd1SSatish Balay } 448016bdd1SSatish Balay 458016bdd1SSatish Balay /* make indices now point into garray */ 468016bdd1SSatish Balay for ( i=0; i<ec; i++ ) { 47d9653453SSatish Balay indices[garray[i]] = i; 488016bdd1SSatish Balay } 498016bdd1SSatish Balay 508016bdd1SSatish Balay /* compact out the extra columns in B */ 51d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 528016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 53d9653453SSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 548016bdd1SSatish Balay } 558016bdd1SSatish Balay } 56d9653453SSatish Balay B->nbs = ec; 57d9653453SSatish Balay B->n = ec*B->bs; 588016bdd1SSatish Balay PetscFree(indices); 598016bdd1SSatish Balay 6057b952d6SSatish Balay for ( i=0,col=0; i<ec; i++ ) { 6157b952d6SSatish Balay for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 62d9653453SSatish Balay } 638016bdd1SSatish Balay /* create local vector that is used to scatter into */ 64029af93fSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 658016bdd1SSatish Balay 66c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 67c16cb8f2SBarry Smith 68029af93fSBarry Smith /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */ 69c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 70c16cb8f2SBarry Smith garray[i] = bs*garray[i]; 71c16cb8f2SBarry Smith } 72029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 73c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 74c16cb8f2SBarry Smith garray[i] = garray[i]/bs; 75c16cb8f2SBarry Smith } 76c16cb8f2SBarry Smith 77537820f0SBarry Smith stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp); 78537820f0SBarry Smith for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 79029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 80537820f0SBarry Smith PetscFree(stmp); 818016bdd1SSatish Balay 828016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 838016bdd1SSatish Balay /* this is inefficient, but otherwise we must do either 848016bdd1SSatish Balay 1) save garray until the first actual scatter when the vector is known or 858016bdd1SSatish Balay 2) have another way of generating a scatter context without a vector.*/ 86d9653453SSatish Balay ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 878016bdd1SSatish Balay 888016bdd1SSatish Balay /* gnerate the scatter context */ 89d9653453SSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 9090f02eecSBarry Smith 9190f02eecSBarry Smith /* 9290f02eecSBarry Smith Post the receives for the first matrix vector product. We sync-chronize after 9390f02eecSBarry Smith this on the chance that the user immediately calls MatMult() after assemblying 9490f02eecSBarry Smith the matrix. 9590f02eecSBarry Smith */ 9643a90d84SBarry Smith ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 97ca161407SBarry Smith ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 9890f02eecSBarry Smith 99d9653453SSatish Balay PLogObjectParent(mat,baij->Mvctx); 100d9653453SSatish Balay PLogObjectParent(mat,baij->lvec); 1018016bdd1SSatish Balay PLogObjectParent(mat,from); 1028016bdd1SSatish Balay PLogObjectParent(mat,to); 103d9653453SSatish Balay baij->garray = garray; 1048016bdd1SSatish Balay PLogObjectMemory(mat,(ec+1)*sizeof(int)); 1058016bdd1SSatish Balay ierr = ISDestroy(from); CHKERRQ(ierr); 1068016bdd1SSatish Balay ierr = ISDestroy(to); CHKERRQ(ierr); 1078016bdd1SSatish Balay ierr = VecDestroy(gvec); 108d9653453SSatish Balay PetscFree(tmp); 1093a40ed3dSBarry Smith PetscFunctionReturn(0); 1108016bdd1SSatish Balay } 1118016bdd1SSatish Balay 1128016bdd1SSatish Balay 1138016bdd1SSatish Balay /* 114d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1158016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1168016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1178016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1188016bdd1SSatish Balay 1198016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1208016bdd1SSatish Balay they are sloppy. 1218016bdd1SSatish Balay */ 1225615d1e5SSatish Balay #undef __FUNC__ 1235615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ" 124d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A) 1258016bdd1SSatish Balay { 126d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 127d9653453SSatish Balay Mat B = baij->B,Bnew; 128d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 129d9653453SSatish Balay int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 130d9653453SSatish Balay int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 131d9653453SSatish Balay Scalar *a=Bbaij->a; 1328016bdd1SSatish Balay 1333a40ed3dSBarry Smith PetscFunctionBegin; 1348016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 135d9653453SSatish Balay ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 136d9653453SSatish Balay ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 137d9653453SSatish Balay ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 138d9653453SSatish Balay if (baij->colmap) { 139d9653453SSatish Balay PetscFree(baij->colmap); baij->colmap = 0; 140d9653453SSatish Balay PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 1418016bdd1SSatish Balay } 1428016bdd1SSatish Balay 1438016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1446d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1456d4a8577SBarry Smith MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1468016bdd1SSatish Balay 1478016bdd1SSatish Balay /* invent new B and copy stuff over */ 148d9653453SSatish Balay nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 149d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 150d9653453SSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 1518016bdd1SSatish Balay } 152029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 1538016bdd1SSatish Balay PetscFree(nz); 154d9653453SSatish Balay 155d9653453SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 156d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 157d9653453SSatish Balay rvals[0] = bs*i; 158d9653453SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 159d9653453SSatish Balay for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 16057b952d6SSatish Balay col = garray[Bbaij->j[j]]*bs; 161d9653453SSatish Balay for (k=0; k<bs; k++ ) { 162d9653453SSatish Balay ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 163d9653453SSatish Balay col++; 1648016bdd1SSatish Balay } 1658016bdd1SSatish Balay } 166d9653453SSatish Balay } 167d9653453SSatish Balay PetscFree(baij->garray); baij->garray = 0; 16857b952d6SSatish Balay PetscFree(rvals); 1698016bdd1SSatish Balay PLogObjectMemory(A,-ec*sizeof(int)); 1708016bdd1SSatish Balay ierr = MatDestroy(B); CHKERRQ(ierr); 1718016bdd1SSatish Balay PLogObjectParent(A,Bnew); 172d9653453SSatish Balay baij->B = Bnew; 1738016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1743a40ed3dSBarry Smith PetscFunctionReturn(0); 1758016bdd1SSatish Balay } 1768016bdd1SSatish Balay 1778016bdd1SSatish Balay 178