1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*0064e2bbSSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.19 1999/01/08 21:03:17 balay Exp balay $"; 38016bdd1SSatish Balay #endif 48016bdd1SSatish Balay 58016bdd1SSatish Balay 68016bdd1SSatish Balay /* 7d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 88016bdd1SSatish Balay */ 970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 108016bdd1SSatish Balay #include "src/vec/vecimpl.h" 118016bdd1SSatish Balay 125615d1e5SSatish Balay #undef __FUNC__ 135615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat) 158016bdd1SSatish Balay { 16d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 18d9653453SSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 19537820f0SBarry Smith int col,bs = baij->bs,*tmp,*stmp; 208016bdd1SSatish Balay IS from,to; 218016bdd1SSatish Balay Vec gvec; 2273a2e727SSatish Balay #if defined (USE_CTABLE) 2373a2e727SSatish Balay Table gid1_lid1; 2473a2e727SSatish Balay CTablePos tpos; 2573a2e727SSatish Balay int gid, lid; 2673a2e727SSatish Balay #endif 278016bdd1SSatish Balay 283a40ed3dSBarry Smith PetscFunctionBegin; 2973a2e727SSatish Balay 3073a2e727SSatish Balay #if defined (USE_CTABLE) 3173a2e727SSatish Balay /* use a table - Mark Adams */ 3273a2e727SSatish Balay TableCreate( &gid1_lid1, B->mbs ); 3373a2e727SSatish Balay for ( i=0; i<B->mbs; i++ ) { 3473a2e727SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 3573a2e727SSatish Balay int gid1 = aj[B->i[i] + j] + 1; 3673a2e727SSatish Balay if ( !TableFind( gid1_lid1, gid1 ) ){ 3773a2e727SSatish Balay /* one based table */ 3873a2e727SSatish Balay ierr = TableAdd( gid1_lid1, gid1, ++ec ); CHKERRQ(ierr); 3973a2e727SSatish Balay } 4073a2e727SSatish Balay } 4173a2e727SSatish Balay } 4273a2e727SSatish Balay /* form array of columns we need */ 4373a2e727SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 4473a2e727SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp); 4573a2e727SSatish Balay ierr = TableGetHeadPosition( gid1_lid1, &tpos ); CHKERRQ(ierr); 4673a2e727SSatish Balay while( tpos ) { 4773a2e727SSatish Balay ierr = TableGetNext( gid1_lid1, &tpos, &gid, &lid ); CHKERRQ(ierr); 4873a2e727SSatish Balay gid--; lid--; 4973a2e727SSatish Balay garray[lid] = gid; 5073a2e727SSatish Balay } 51*0064e2bbSSatish Balay ierr = PetscSortInt(ec,garray); CHKERRQ(ierr); 52*0064e2bbSSatish Balay /* qsort( garray, ec, sizeof(int), intcomparcarc ); */ 5373a2e727SSatish Balay TableRemoveAll( gid1_lid1 ); 5473a2e727SSatish Balay for ( i=0; i<ec; i++ ) { 5573a2e727SSatish Balay ierr = TableAdd( gid1_lid1, garray[i] + 1, i + 1 ); CHKERRQ(ierr); 5673a2e727SSatish Balay } 5773a2e727SSatish Balay /* compact out the extra columns in B */ 5873a2e727SSatish Balay for ( i=0; i<B->mbs; i++ ) { 5973a2e727SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 6073a2e727SSatish Balay int gid1 = aj[B->i[i] + j] + 1; 6173a2e727SSatish Balay lid = TableFind( gid1_lid1, gid1 ) - 1; 6273a2e727SSatish Balay aj[B->i[i] + j] = lid; 6373a2e727SSatish Balay } 6473a2e727SSatish Balay } 6573a2e727SSatish Balay B->nbs = ec; 6673a2e727SSatish Balay B->n = ec*B->bs; 6773a2e727SSatish Balay TableDelete(gid1_lid1); 6873a2e727SSatish Balay /* Mark Adams */ 6973a2e727SSatish Balay #else 708016bdd1SSatish Balay /* For the first stab we make an array as long as the number of columns */ 71d9653453SSatish Balay /* mark those columns that are in baij->B */ 729dbe0bc3SSatish Balay indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices); 73d9653453SSatish Balay PetscMemzero(indices,Nbs*sizeof(int)); 74d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 758016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 76d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 77d9653453SSatish Balay indices[aj[B->i[i] + j] ] = 1; 788016bdd1SSatish Balay } 798016bdd1SSatish Balay } 808016bdd1SSatish Balay 818016bdd1SSatish Balay /* form array of columns we need */ 828016bdd1SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 83d9653453SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 848016bdd1SSatish Balay ec = 0; 85d9653453SSatish Balay for ( i=0; i<Nbs; i++ ) { 860bdbc534SSatish Balay if (indices[i]) { 870bdbc534SSatish Balay garray[ec++] = i; 880bdbc534SSatish Balay } 898016bdd1SSatish Balay } 908016bdd1SSatish Balay 918016bdd1SSatish Balay /* make indices now point into garray */ 928016bdd1SSatish Balay for ( i=0; i<ec; i++ ) { 93d9653453SSatish Balay indices[garray[i]] = i; 948016bdd1SSatish Balay } 958016bdd1SSatish Balay 968016bdd1SSatish Balay /* compact out the extra columns in B */ 97d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 988016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 99d9653453SSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 1008016bdd1SSatish Balay } 1018016bdd1SSatish Balay } 102d9653453SSatish Balay B->nbs = ec; 103d9653453SSatish Balay B->n = ec*B->bs; 1048016bdd1SSatish Balay PetscFree(indices); 10573a2e727SSatish Balay #endif 1068016bdd1SSatish Balay 10757b952d6SSatish Balay for ( i=0,col=0; i<ec; i++ ) { 10857b952d6SSatish Balay for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109d9653453SSatish Balay } 1108016bdd1SSatish Balay /* create local vector that is used to scatter into */ 111029af93fSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 1128016bdd1SSatish Balay 113c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 114c16cb8f2SBarry Smith 115029af93fSBarry Smith /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */ 116c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 117c16cb8f2SBarry Smith garray[i] = bs*garray[i]; 118c16cb8f2SBarry Smith } 119029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 120c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 121c16cb8f2SBarry Smith garray[i] = garray[i]/bs; 122c16cb8f2SBarry Smith } 123c16cb8f2SBarry Smith 124537820f0SBarry Smith stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp); 125537820f0SBarry Smith for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 126029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 127537820f0SBarry Smith PetscFree(stmp); 1288016bdd1SSatish Balay 1298016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1308016bdd1SSatish Balay /* this is inefficient, but otherwise we must do either 1318016bdd1SSatish Balay 1) save garray until the first actual scatter when the vector is known or 1328016bdd1SSatish Balay 2) have another way of generating a scatter context without a vector.*/ 133d9653453SSatish Balay ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 1348016bdd1SSatish Balay 1358016bdd1SSatish Balay /* gnerate the scatter context */ 136d9653453SSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 13790f02eecSBarry Smith 13890f02eecSBarry Smith /* 13990f02eecSBarry Smith Post the receives for the first matrix vector product. We sync-chronize after 14090f02eecSBarry Smith this on the chance that the user immediately calls MatMult() after assemblying 14190f02eecSBarry Smith the matrix. 14290f02eecSBarry Smith */ 14343a90d84SBarry Smith ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 144ca161407SBarry Smith ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 14590f02eecSBarry Smith 146d9653453SSatish Balay PLogObjectParent(mat,baij->Mvctx); 147d9653453SSatish Balay PLogObjectParent(mat,baij->lvec); 1488016bdd1SSatish Balay PLogObjectParent(mat,from); 1498016bdd1SSatish Balay PLogObjectParent(mat,to); 150d9653453SSatish Balay baij->garray = garray; 1518016bdd1SSatish Balay PLogObjectMemory(mat,(ec+1)*sizeof(int)); 1528016bdd1SSatish Balay ierr = ISDestroy(from); CHKERRQ(ierr); 1538016bdd1SSatish Balay ierr = ISDestroy(to); CHKERRQ(ierr); 1548016bdd1SSatish Balay ierr = VecDestroy(gvec); 155d9653453SSatish Balay PetscFree(tmp); 1563a40ed3dSBarry Smith PetscFunctionReturn(0); 1578016bdd1SSatish Balay } 1588016bdd1SSatish Balay 1598016bdd1SSatish Balay 1608016bdd1SSatish Balay /* 161d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1628016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1638016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1648016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1658016bdd1SSatish Balay 1668016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1678016bdd1SSatish Balay they are sloppy. 1688016bdd1SSatish Balay */ 1695615d1e5SSatish Balay #undef __FUNC__ 1705615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ" 171d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A) 1728016bdd1SSatish Balay { 173d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 174d9653453SSatish Balay Mat B = baij->B,Bnew; 175d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 176d9653453SSatish Balay int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 177d9653453SSatish Balay int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 178d9653453SSatish Balay Scalar *a=Bbaij->a; 1798016bdd1SSatish Balay 1803a40ed3dSBarry Smith PetscFunctionBegin; 1818016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 182d9653453SSatish Balay ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 183d9653453SSatish Balay ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 184d9653453SSatish Balay ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 185d9653453SSatish Balay if (baij->colmap) { 18648e59246SSatish Balay #if defined (USE_CTABLE) 18748e59246SSatish Balay TableDelete(baij->colmap); baij->colmap = 0; 18848e59246SSatish Balay #else 189d9653453SSatish Balay PetscFree(baij->colmap); baij->colmap = 0; 190d9653453SSatish Balay PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 19148e59246SSatish Balay #endif 1928016bdd1SSatish Balay } 1938016bdd1SSatish Balay 1948016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1956d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1966d4a8577SBarry Smith MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1978016bdd1SSatish Balay 1988016bdd1SSatish Balay /* invent new B and copy stuff over */ 199d9653453SSatish Balay nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 200d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 201d9653453SSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 2028016bdd1SSatish Balay } 203029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 2048016bdd1SSatish Balay PetscFree(nz); 205d9653453SSatish Balay 206d9653453SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 207d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 208d9653453SSatish Balay rvals[0] = bs*i; 209d9653453SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 210d9653453SSatish Balay for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 21157b952d6SSatish Balay col = garray[Bbaij->j[j]]*bs; 212d9653453SSatish Balay for (k=0; k<bs; k++ ) { 213d9653453SSatish Balay ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 214d9653453SSatish Balay col++; 2158016bdd1SSatish Balay } 2168016bdd1SSatish Balay } 217d9653453SSatish Balay } 218d9653453SSatish Balay PetscFree(baij->garray); baij->garray = 0; 21957b952d6SSatish Balay PetscFree(rvals); 2208016bdd1SSatish Balay PLogObjectMemory(A,-ec*sizeof(int)); 2218016bdd1SSatish Balay ierr = MatDestroy(B); CHKERRQ(ierr); 2228016bdd1SSatish Balay PLogObjectParent(A,Bnew); 223d9653453SSatish Balay baij->B = Bnew; 2248016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 2253a40ed3dSBarry Smith PetscFunctionReturn(0); 2268016bdd1SSatish Balay } 2278016bdd1SSatish Balay 2288016bdd1SSatish Balay 229