18016bdd1SSatish Balay #ifndef lint 2*57b952d6SSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.2 1996/06/18 16:10:55 balay Exp balay $"; 38016bdd1SSatish Balay #endif 48016bdd1SSatish Balay 58016bdd1SSatish Balay 68016bdd1SSatish Balay /* 7d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 88016bdd1SSatish Balay */ 9d9653453SSatish Balay #include "mpibaij.h" 108016bdd1SSatish Balay #include "src/vec/vecimpl.h" 11d9653453SSatish Balay #include "../seq/baij.h" 128016bdd1SSatish Balay 13d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat) 148016bdd1SSatish Balay { 15d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 16d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 17d9653453SSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 18*57b952d6SSatish Balay int col,bs = baij->bs,*tmp; 198016bdd1SSatish Balay IS from,to; 208016bdd1SSatish Balay Vec gvec; 218016bdd1SSatish Balay 228016bdd1SSatish Balay /* For the first stab we make an array as long as the number of columns */ 23d9653453SSatish Balay /* mark those columns that are in baij->B */ 24d9653453SSatish Balay indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices); 25d9653453SSatish Balay PetscMemzero(indices,Nbs*sizeof(int)); 26d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 278016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 28d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 29d9653453SSatish Balay indices[aj[B->i[i] + j] ] = 1; 308016bdd1SSatish Balay } 318016bdd1SSatish Balay } 328016bdd1SSatish Balay 338016bdd1SSatish Balay /* form array of columns we need */ 348016bdd1SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 35d9653453SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 368016bdd1SSatish Balay ec = 0; 37d9653453SSatish Balay for ( i=0; i<Nbs; i++ ) { 388016bdd1SSatish Balay if (indices[i]) garray[ec++] = i; 398016bdd1SSatish Balay } 408016bdd1SSatish Balay 418016bdd1SSatish Balay /* make indices now point into garray */ 428016bdd1SSatish Balay for ( i=0; i<ec; i++ ) { 43d9653453SSatish Balay indices[garray[i]] = i; 448016bdd1SSatish Balay } 458016bdd1SSatish Balay 468016bdd1SSatish Balay /* compact out the extra columns in B */ 47d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 488016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 49d9653453SSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 508016bdd1SSatish Balay } 518016bdd1SSatish Balay } 52d9653453SSatish Balay B->nbs = ec; 53d9653453SSatish Balay B->n = ec*B->bs; 548016bdd1SSatish Balay PetscFree(indices); 558016bdd1SSatish Balay 56*57b952d6SSatish Balay for ( i=0,col=0; i<ec; i++ ) { 57*57b952d6SSatish Balay for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 58d9653453SSatish Balay } 598016bdd1SSatish Balay /* create local vector that is used to scatter into */ 60d9653453SSatish Balay ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 618016bdd1SSatish Balay 628016bdd1SSatish Balay /* create two temporary Index sets for build scatter gather */ 63d9653453SSatish Balay ierr = ISCreateSeq(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); 64d9653453SSatish Balay ierr = ISCreateStrideSeq(MPI_COMM_SELF,ec*bs,0,1,&to); CHKERRQ(ierr); 658016bdd1SSatish Balay 668016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 678016bdd1SSatish Balay /* this is inefficient, but otherwise we must do either 688016bdd1SSatish Balay 1) save garray until the first actual scatter when the vector is known or 698016bdd1SSatish Balay 2) have another way of generating a scatter context without a vector.*/ 70d9653453SSatish Balay ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 718016bdd1SSatish Balay 728016bdd1SSatish Balay /* gnerate the scatter context */ 73d9653453SSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx); CHKERRQ(ierr); 74d9653453SSatish Balay PLogObjectParent(mat,baij->Mvctx); 75d9653453SSatish Balay PLogObjectParent(mat,baij->lvec); 768016bdd1SSatish Balay PLogObjectParent(mat,from); 778016bdd1SSatish Balay PLogObjectParent(mat,to); 78d9653453SSatish Balay baij->garray = garray; 798016bdd1SSatish Balay PLogObjectMemory(mat,(ec+1)*sizeof(int)); 808016bdd1SSatish Balay ierr = ISDestroy(from); CHKERRQ(ierr); 818016bdd1SSatish Balay ierr = ISDestroy(to); CHKERRQ(ierr); 828016bdd1SSatish Balay ierr = VecDestroy(gvec); 83d9653453SSatish Balay PetscFree(tmp); 848016bdd1SSatish Balay return 0; 858016bdd1SSatish Balay } 868016bdd1SSatish Balay 878016bdd1SSatish Balay 888016bdd1SSatish Balay /* 89d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 908016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 918016bdd1SSatish Balay that require more communication in the matrix vector multiply. 928016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 938016bdd1SSatish Balay 948016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 958016bdd1SSatish Balay they are sloppy. 968016bdd1SSatish Balay */ 97d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A) 988016bdd1SSatish Balay { 99d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 100d9653453SSatish Balay Mat B = baij->B,Bnew; 101d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 102d9653453SSatish Balay int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 103d9653453SSatish Balay int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 104d9653453SSatish Balay Scalar *a=Bbaij->a; 1058016bdd1SSatish Balay 1068016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 107d9653453SSatish Balay ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 108d9653453SSatish Balay ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 109d9653453SSatish Balay ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 110d9653453SSatish Balay if (baij->colmap) { 111d9653453SSatish Balay PetscFree(baij->colmap); baij->colmap = 0; 112d9653453SSatish Balay PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 1138016bdd1SSatish Balay } 1148016bdd1SSatish Balay 1158016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1168016bdd1SSatish Balay ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1178016bdd1SSatish Balay MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1188016bdd1SSatish Balay 1198016bdd1SSatish Balay /* invent new B and copy stuff over */ 120d9653453SSatish Balay nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 121d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 122d9653453SSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 1238016bdd1SSatish Balay } 124d9653453SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 1258016bdd1SSatish Balay PetscFree(nz); 126d9653453SSatish Balay 127d9653453SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 128d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 129d9653453SSatish Balay rvals[0] = bs*i; 130d9653453SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 131d9653453SSatish Balay for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 132*57b952d6SSatish Balay col = garray[Bbaij->j[j]]*bs; 133d9653453SSatish Balay for (k=0; k<bs; k++ ) { 134d9653453SSatish Balay ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 135d9653453SSatish Balay col++; 1368016bdd1SSatish Balay } 1378016bdd1SSatish Balay } 138d9653453SSatish Balay } 139d9653453SSatish Balay PetscFree(baij->garray); baij->garray = 0; 140*57b952d6SSatish Balay PetscFree(rvals); 1418016bdd1SSatish Balay PLogObjectMemory(A,-ec*sizeof(int)); 1428016bdd1SSatish Balay ierr = MatDestroy(B); CHKERRQ(ierr); 1438016bdd1SSatish Balay PLogObjectParent(A,Bnew); 144d9653453SSatish Balay baij->B = Bnew; 1458016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1468016bdd1SSatish Balay return 0; 1478016bdd1SSatish Balay } 1488016bdd1SSatish Balay 1498016bdd1SSatish Balay 150