1*5010ffefSSatish Balay /*$Id: mmbaij.c,v 1.43 2001/08/07 03:02:58 balay Exp balay $*/ 28016bdd1SSatish Balay 38016bdd1SSatish Balay /* 4d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 58016bdd1SSatish Balay */ 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 78016bdd1SSatish Balay #include "src/vec/vecimpl.h" 887828ca2SBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 98016bdd1SSatish Balay 104a2ae208SSatish Balay #undef __FUNCT__ 114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat) 138016bdd1SSatish Balay { 14d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16d9653453SSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 173d3cf644SBarry Smith int bs = baij->bs,*stmp; 188016bdd1SSatish Balay IS from,to; 198016bdd1SSatish Balay Vec gvec; 20aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 210f5bd95cSBarry Smith PetscTable gid1_lid1; 220f5bd95cSBarry Smith PetscTablePosition tpos; 2373a2e727SSatish Balay int gid,lid; 2473a2e727SSatish Balay #endif 258016bdd1SSatish Balay 263a40ed3dSBarry Smith PetscFunctionBegin; 2773a2e727SSatish Balay 28aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 2973a2e727SSatish Balay /* use a table - Mark Adams */ 30273d9f13SBarry Smith ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 3173a2e727SSatish Balay for (i=0; i<B->mbs; i++) { 3273a2e727SSatish Balay for (j=0; j<B->ilen[i]; j++) { 33fa46199cSSatish Balay int data,gid1 = aj[B->i[i]+j] + 1; 340f5bd95cSBarry Smith ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35fa46199cSSatish Balay if (!data) { 3673a2e727SSatish Balay /* one based table */ 370f5bd95cSBarry Smith ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 3873a2e727SSatish Balay } 3973a2e727SSatish Balay } 4073a2e727SSatish Balay } 4173a2e727SSatish Balay /* form array of columns we need */ 42b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 430f5bd95cSBarry Smith ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 4473a2e727SSatish Balay while (tpos) { 450f5bd95cSBarry Smith ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 4673a2e727SSatish Balay gid--; lid--; 4773a2e727SSatish Balay garray[lid] = gid; 4873a2e727SSatish Balay } 490064e2bbSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 500064e2bbSSatish Balay /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 510f5bd95cSBarry Smith ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 5273a2e727SSatish Balay for (i=0; i<ec; i++) { 530f5bd95cSBarry Smith ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 5473a2e727SSatish Balay } 5573a2e727SSatish Balay /* compact out the extra columns in B */ 5673a2e727SSatish Balay for (i=0; i<B->mbs; i++) { 5773a2e727SSatish Balay for (j=0; j<B->ilen[i]; j++) { 5873a2e727SSatish Balay int gid1 = aj[B->i[i] + j] + 1; 590f5bd95cSBarry Smith ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60fa46199cSSatish Balay lid --; 6173a2e727SSatish Balay aj[B->i[i]+j] = lid; 6273a2e727SSatish Balay } 6373a2e727SSatish Balay } 6473a2e727SSatish Balay B->nbs = ec; 65273d9f13SBarry Smith baij->B->n = ec*B->bs; 660f5bd95cSBarry Smith ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 6773a2e727SSatish Balay /* Mark Adams */ 6873a2e727SSatish Balay #else 69435da068SBarry Smith /* Make an array as long as the number of columns */ 70d9653453SSatish Balay /* mark those columns that are in baij->B */ 71b0a32e0cSBarry Smith ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 72549d3d68SSatish Balay ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 73d9653453SSatish Balay for (i=0; i<B->mbs; i++) { 748016bdd1SSatish Balay for (j=0; j<B->ilen[i]; j++) { 75d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 76d9653453SSatish Balay indices[aj[B->i[i] + j]] = 1; 778016bdd1SSatish Balay } 788016bdd1SSatish Balay } 798016bdd1SSatish Balay 808016bdd1SSatish Balay /* form array of columns we need */ 81b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 828016bdd1SSatish Balay ec = 0; 83d9653453SSatish Balay for (i=0; i<Nbs; i++) { 840bdbc534SSatish Balay if (indices[i]) { 850bdbc534SSatish Balay garray[ec++] = i; 860bdbc534SSatish Balay } 878016bdd1SSatish Balay } 888016bdd1SSatish Balay 898016bdd1SSatish Balay /* make indices now point into garray */ 908016bdd1SSatish Balay for (i=0; i<ec; i++) { 91d9653453SSatish Balay indices[garray[i]] = i; 928016bdd1SSatish Balay } 938016bdd1SSatish Balay 948016bdd1SSatish Balay /* compact out the extra columns in B */ 95d9653453SSatish Balay for (i=0; i<B->mbs; i++) { 968016bdd1SSatish Balay for (j=0; j<B->ilen[i]; j++) { 97d9653453SSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 988016bdd1SSatish Balay } 998016bdd1SSatish Balay } 100d9653453SSatish Balay B->nbs = ec; 101273d9f13SBarry Smith baij->B->n = ec*B->bs; 102606d414cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 10373a2e727SSatish Balay #endif 1048016bdd1SSatish Balay 1058016bdd1SSatish Balay /* create local vector that is used to scatter into */ 106029af93fSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 1078016bdd1SSatish Balay 108c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 1093d3cf644SBarry Smith for (i=0; i<ec; i++) { 110c16cb8f2SBarry Smith garray[i] = bs*garray[i]; 111c16cb8f2SBarry Smith } 112029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 1133d3cf644SBarry Smith for (i=0; i<ec; i++) { 114c16cb8f2SBarry Smith garray[i] = garray[i]/bs; 115c16cb8f2SBarry Smith } 116c16cb8f2SBarry Smith 117b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 118537820f0SBarry Smith for (i=0; i<ec; i++) { stmp[i] = bs*i; } 119029af93fSBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 120606d414cSSatish Balay ierr = PetscFree(stmp);CHKERRQ(ierr); 1218016bdd1SSatish Balay 1228016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1238016bdd1SSatish Balay /* this is inefficient, but otherwise we must do either 1248016bdd1SSatish Balay 1) save garray until the first actual scatter when the vector is known or 1258016bdd1SSatish Balay 2) have another way of generating a scatter context without a vector.*/ 126273d9f13SBarry Smith ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 1278016bdd1SSatish Balay 1288016bdd1SSatish Balay /* gnerate the scatter context */ 129d9653453SSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 13090f02eecSBarry Smith 13190f02eecSBarry Smith /* 13290f02eecSBarry Smith Post the receives for the first matrix vector product. We sync-chronize after 13390f02eecSBarry Smith this on the chance that the user immediately calls MatMult() after assemblying 13490f02eecSBarry Smith the matrix. 13590f02eecSBarry Smith */ 13643a90d84SBarry Smith ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 137ca161407SBarry Smith ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 13890f02eecSBarry Smith 139b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->Mvctx); 140b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->lvec); 141b0a32e0cSBarry Smith PetscLogObjectParent(mat,from); 142b0a32e0cSBarry Smith PetscLogObjectParent(mat,to); 143d9653453SSatish Balay baij->garray = garray; 144b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 1458016bdd1SSatish Balay ierr = ISDestroy(from);CHKERRQ(ierr); 1468016bdd1SSatish Balay ierr = ISDestroy(to);CHKERRQ(ierr); 147888f2ed8SSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 1483a40ed3dSBarry Smith PetscFunctionReturn(0); 1498016bdd1SSatish Balay } 1508016bdd1SSatish Balay 1518016bdd1SSatish Balay 1528016bdd1SSatish Balay /* 153d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1548016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1558016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1568016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1578016bdd1SSatish Balay 1588016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1598016bdd1SSatish Balay they are sloppy. 1608016bdd1SSatish Balay */ 1614a2ae208SSatish Balay #undef __FUNCT__ 1624a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPIBAIJ" 163d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A) 1648016bdd1SSatish Balay { 165d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 166d9653453SSatish Balay Mat B = baij->B,Bnew; 167d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 168273d9f13SBarry Smith int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 169273d9f13SBarry Smith int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m = A->m; 1703eda8832SBarry Smith MatScalar *a = Bbaij->a; 17187828ca2SBarry Smith PetscScalar *atmp; 17282502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 17382502324SSatish Balay int l; 1743eda8832SBarry Smith #endif 1758016bdd1SSatish Balay 1763a40ed3dSBarry Smith PetscFunctionBegin; 1778016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 178b0a32e0cSBarry Smith ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 179d9653453SSatish Balay ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 180d9653453SSatish Balay ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 181d9653453SSatish Balay if (baij->colmap) { 182aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 1830f5bd95cSBarry Smith ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 18448e59246SSatish Balay #else 185606d414cSSatish Balay ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 186606d414cSSatish Balay baij->colmap = 0; 187b0a32e0cSBarry Smith PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 18848e59246SSatish Balay #endif 1898016bdd1SSatish Balay } 1908016bdd1SSatish Balay 1918016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1926d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933eda8832SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1948016bdd1SSatish Balay 1958016bdd1SSatish Balay /* invent new B and copy stuff over */ 19682502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 197d9653453SSatish Balay for (i=0; i<mbs; i++) { 198d9653453SSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 1998016bdd1SSatish Balay } 200029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 201*5010ffefSSatish Balay ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr); 202d9653453SSatish Balay 2033eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 204*5010ffefSSatish Balay ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 205*5010ffefSSatish Balay #endif 206*5010ffefSSatish Balay for (i=0; i<mbs; i++) { 207*5010ffefSSatish Balay for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 208*5010ffefSSatish Balay col = garray[Bbaij->j[j]]; 209*5010ffefSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 210*5010ffefSSatish Balay for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 2113eda8832SBarry Smith #else 2123eda8832SBarry Smith atmp = a + j*bs2; 2133eda8832SBarry Smith #endif 214*5010ffefSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 2158016bdd1SSatish Balay } 2168016bdd1SSatish Balay } 217*5010ffefSSatish Balay ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr); 218*5010ffefSSatish Balay 2193eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2203eda8832SBarry Smith ierr = PetscFree(atmp);CHKERRQ(ierr); 2213eda8832SBarry Smith #endif 222*5010ffefSSatish Balay 223*5010ffefSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 224606d414cSSatish Balay ierr = PetscFree(baij->garray);CHKERRQ(ierr); 225606d414cSSatish Balay baij->garray = 0; 226b0a32e0cSBarry Smith PetscLogObjectMemory(A,-ec*sizeof(int)); 2278016bdd1SSatish Balay ierr = MatDestroy(B);CHKERRQ(ierr); 228b0a32e0cSBarry Smith PetscLogObjectParent(A,Bnew); 229d9653453SSatish Balay baij->B = Bnew; 2308016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 2313a40ed3dSBarry Smith PetscFunctionReturn(0); 2328016bdd1SSatish Balay } 2338016bdd1SSatish Balay 2348016bdd1SSatish Balay 2353eda8832SBarry Smith 236