1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*73a2e727SSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.18 1999/01/08 18:18:13 balay Exp balay $"; 38016bdd1SSatish Balay #endif 48016bdd1SSatish Balay 58016bdd1SSatish Balay 68016bdd1SSatish Balay /* 7d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 88016bdd1SSatish Balay */ 970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 108016bdd1SSatish Balay #include "src/vec/vecimpl.h" 118016bdd1SSatish Balay 125615d1e5SSatish Balay #undef __FUNC__ 135615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat) 158016bdd1SSatish Balay { 16d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 18d9653453SSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 19537820f0SBarry Smith int col,bs = baij->bs,*tmp,*stmp; 208016bdd1SSatish Balay IS from,to; 218016bdd1SSatish Balay Vec gvec; 22*73a2e727SSatish Balay #if defined (USE_CTABLE) 23*73a2e727SSatish Balay Table gid1_lid1; 24*73a2e727SSatish Balay CTablePos tpos; 25*73a2e727SSatish Balay int gid, lid; 26*73a2e727SSatish Balay #endif 278016bdd1SSatish Balay 283a40ed3dSBarry Smith PetscFunctionBegin; 29*73a2e727SSatish Balay 30*73a2e727SSatish Balay #if defined (USE_CTABLE) 31*73a2e727SSatish Balay /* use a table - Mark Adams */ 32*73a2e727SSatish Balay TableCreate( &gid1_lid1, B->mbs ); 33*73a2e727SSatish Balay for ( i=0; i<B->mbs; i++ ) { 34*73a2e727SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 35*73a2e727SSatish Balay int gid1 = aj[B->i[i] + j] + 1; 36*73a2e727SSatish Balay if ( !TableFind( gid1_lid1, gid1 ) ){ 37*73a2e727SSatish Balay /* one based table */ 38*73a2e727SSatish Balay ierr = TableAdd( gid1_lid1, gid1, ++ec ); CHKERRQ(ierr); 39*73a2e727SSatish Balay } 40*73a2e727SSatish Balay } 41*73a2e727SSatish Balay } 42*73a2e727SSatish Balay /* form array of columns we need */ 43*73a2e727SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 44*73a2e727SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp); 45*73a2e727SSatish Balay ierr = TableGetHeadPosition( gid1_lid1, &tpos ); CHKERRQ(ierr); 46*73a2e727SSatish Balay while( tpos ) { 47*73a2e727SSatish Balay ierr = TableGetNext( gid1_lid1, &tpos, &gid, &lid ); CHKERRQ(ierr); 48*73a2e727SSatish Balay gid--; lid--; 49*73a2e727SSatish Balay garray[lid] = gid; 50*73a2e727SSatish Balay } 51*73a2e727SSatish Balay qsort( garray, ec, sizeof(int), intcomparc ); /* sort */ 52*73a2e727SSatish Balay TableRemoveAll( gid1_lid1 ); 53*73a2e727SSatish Balay for ( i=0; i<ec; i++ ) { 54*73a2e727SSatish Balay ierr = TableAdd( gid1_lid1, garray[i] + 1, i + 1 ); CHKERRQ(ierr); 55*73a2e727SSatish Balay } 56*73a2e727SSatish Balay /* compact out the extra columns in B */ 57*73a2e727SSatish Balay for ( i=0; i<B->mbs; i++ ) { 58*73a2e727SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 59*73a2e727SSatish Balay int gid1 = aj[B->i[i] + j] + 1; 60*73a2e727SSatish Balay lid = TableFind( gid1_lid1, gid1 ) - 1; 61*73a2e727SSatish Balay aj[B->i[i] + j] = lid; 62*73a2e727SSatish Balay } 63*73a2e727SSatish Balay } 64*73a2e727SSatish Balay B->nbs = ec; 65*73a2e727SSatish Balay B->n = ec*B->bs; 66*73a2e727SSatish Balay TableDelete(gid1_lid1); 67*73a2e727SSatish Balay /* Mark Adams */ 68*73a2e727SSatish Balay #else 698016bdd1SSatish Balay /* For the first stab we make an array as long as the number of columns */ 70d9653453SSatish Balay /* mark those columns that are in baij->B */ 719dbe0bc3SSatish Balay indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices); 72d9653453SSatish Balay PetscMemzero(indices,Nbs*sizeof(int)); 73d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 748016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 75d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 76d9653453SSatish Balay indices[aj[B->i[i] + j] ] = 1; 778016bdd1SSatish Balay } 788016bdd1SSatish Balay } 798016bdd1SSatish Balay 808016bdd1SSatish Balay /* form array of columns we need */ 818016bdd1SSatish Balay garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 82d9653453SSatish Balay tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 838016bdd1SSatish Balay ec = 0; 84d9653453SSatish Balay for ( i=0; i<Nbs; i++ ) { 850bdbc534SSatish Balay if (indices[i]) { 860bdbc534SSatish Balay garray[ec++] = i; 870bdbc534SSatish Balay } 888016bdd1SSatish Balay } 898016bdd1SSatish Balay 908016bdd1SSatish Balay /* make indices now point into garray */ 918016bdd1SSatish Balay for ( i=0; i<ec; i++ ) { 92d9653453SSatish Balay indices[garray[i]] = i; 938016bdd1SSatish Balay } 948016bdd1SSatish Balay 958016bdd1SSatish Balay /* compact out the extra columns in B */ 96d9653453SSatish Balay for ( i=0; i<B->mbs; i++ ) { 978016bdd1SSatish Balay for ( j=0; j<B->ilen[i]; j++ ) { 98d9653453SSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 998016bdd1SSatish Balay } 1008016bdd1SSatish Balay } 101d9653453SSatish Balay B->nbs = ec; 102d9653453SSatish Balay B->n = ec*B->bs; 1038016bdd1SSatish Balay PetscFree(indices); 104*73a2e727SSatish Balay #endif 1058016bdd1SSatish Balay 10657b952d6SSatish Balay for ( i=0,col=0; i<ec; i++ ) { 10757b952d6SSatish Balay for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 108d9653453SSatish Balay } 1098016bdd1SSatish Balay /* create local vector that is used to scatter into */ 110029af93fSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 1118016bdd1SSatish Balay 112c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 113c16cb8f2SBarry Smith 114029af93fSBarry Smith /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */ 115c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 116c16cb8f2SBarry Smith garray[i] = bs*garray[i]; 117c16cb8f2SBarry Smith } 118029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 119c16cb8f2SBarry Smith for ( i=0,col=0; i<ec; i++ ) { 120c16cb8f2SBarry Smith garray[i] = garray[i]/bs; 121c16cb8f2SBarry Smith } 122c16cb8f2SBarry Smith 123537820f0SBarry Smith stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp); 124537820f0SBarry Smith for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 125029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 126537820f0SBarry Smith PetscFree(stmp); 1278016bdd1SSatish Balay 1288016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1298016bdd1SSatish Balay /* this is inefficient, but otherwise we must do either 1308016bdd1SSatish Balay 1) save garray until the first actual scatter when the vector is known or 1318016bdd1SSatish Balay 2) have another way of generating a scatter context without a vector.*/ 132d9653453SSatish Balay ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 1338016bdd1SSatish Balay 1348016bdd1SSatish Balay /* gnerate the scatter context */ 135d9653453SSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 13690f02eecSBarry Smith 13790f02eecSBarry Smith /* 13890f02eecSBarry Smith Post the receives for the first matrix vector product. We sync-chronize after 13990f02eecSBarry Smith this on the chance that the user immediately calls MatMult() after assemblying 14090f02eecSBarry Smith the matrix. 14190f02eecSBarry Smith */ 14243a90d84SBarry Smith ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 143ca161407SBarry Smith ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 14490f02eecSBarry Smith 145d9653453SSatish Balay PLogObjectParent(mat,baij->Mvctx); 146d9653453SSatish Balay PLogObjectParent(mat,baij->lvec); 1478016bdd1SSatish Balay PLogObjectParent(mat,from); 1488016bdd1SSatish Balay PLogObjectParent(mat,to); 149d9653453SSatish Balay baij->garray = garray; 1508016bdd1SSatish Balay PLogObjectMemory(mat,(ec+1)*sizeof(int)); 1518016bdd1SSatish Balay ierr = ISDestroy(from); CHKERRQ(ierr); 1528016bdd1SSatish Balay ierr = ISDestroy(to); CHKERRQ(ierr); 1538016bdd1SSatish Balay ierr = VecDestroy(gvec); 154d9653453SSatish Balay PetscFree(tmp); 1553a40ed3dSBarry Smith PetscFunctionReturn(0); 1568016bdd1SSatish Balay } 1578016bdd1SSatish Balay 1588016bdd1SSatish Balay 1598016bdd1SSatish Balay /* 160d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1618016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1628016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1638016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1648016bdd1SSatish Balay 1658016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1668016bdd1SSatish Balay they are sloppy. 1678016bdd1SSatish Balay */ 1685615d1e5SSatish Balay #undef __FUNC__ 1695615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ" 170d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A) 1718016bdd1SSatish Balay { 172d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 173d9653453SSatish Balay Mat B = baij->B,Bnew; 174d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 175d9653453SSatish Balay int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 176d9653453SSatish Balay int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 177d9653453SSatish Balay Scalar *a=Bbaij->a; 1788016bdd1SSatish Balay 1793a40ed3dSBarry Smith PetscFunctionBegin; 1808016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 181d9653453SSatish Balay ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 182d9653453SSatish Balay ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 183d9653453SSatish Balay ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 184d9653453SSatish Balay if (baij->colmap) { 18548e59246SSatish Balay #if defined (USE_CTABLE) 18648e59246SSatish Balay TableDelete(baij->colmap); baij->colmap = 0; 18748e59246SSatish Balay #else 188d9653453SSatish Balay PetscFree(baij->colmap); baij->colmap = 0; 189d9653453SSatish Balay PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 19048e59246SSatish Balay #endif 1918016bdd1SSatish Balay } 1928016bdd1SSatish Balay 1938016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1946d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1956d4a8577SBarry Smith MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1968016bdd1SSatish Balay 1978016bdd1SSatish Balay /* invent new B and copy stuff over */ 198d9653453SSatish Balay nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 199d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 200d9653453SSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 2018016bdd1SSatish Balay } 202029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 2038016bdd1SSatish Balay PetscFree(nz); 204d9653453SSatish Balay 205d9653453SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 206d9653453SSatish Balay for ( i=0; i<mbs; i++ ) { 207d9653453SSatish Balay rvals[0] = bs*i; 208d9653453SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 209d9653453SSatish Balay for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 21057b952d6SSatish Balay col = garray[Bbaij->j[j]]*bs; 211d9653453SSatish Balay for (k=0; k<bs; k++ ) { 212d9653453SSatish Balay ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 213d9653453SSatish Balay col++; 2148016bdd1SSatish Balay } 2158016bdd1SSatish Balay } 216d9653453SSatish Balay } 217d9653453SSatish Balay PetscFree(baij->garray); baij->garray = 0; 21857b952d6SSatish Balay PetscFree(rvals); 2198016bdd1SSatish Balay PLogObjectMemory(A,-ec*sizeof(int)); 2208016bdd1SSatish Balay ierr = MatDestroy(B); CHKERRQ(ierr); 2218016bdd1SSatish Balay PLogObjectParent(A,Bnew); 222d9653453SSatish Balay baij->B = Bnew; 2238016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 2243a40ed3dSBarry Smith PetscFunctionReturn(0); 2258016bdd1SSatish Balay } 2268016bdd1SSatish Balay 2278016bdd1SSatish Balay 228