173f4d377SMatthew Knepley /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2a30f8f8cSSatish Balay 3a30f8f8cSSatish Balay /* 4a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 5a30f8f8cSSatish Balay */ 62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7a30f8f8cSSatish Balay #include "src/vec/vecimpl.h" 887828ca2SBarry Smith extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9a30f8f8cSSatish Balay 104a2ae208SSatish Balay #undef __FUNCT__ 114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat) 13a30f8f8cSSatish Balay { 142896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 15a30f8f8cSSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16a30f8f8cSSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 1786e15631SSatish Balay int bs = baij->bs,*stmp; 18a30f8f8cSSatish Balay IS from,to; 19a30f8f8cSSatish Balay Vec gvec; 20a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 21a30f8f8cSSatish Balay PetscTable gid1_lid1; 22a30f8f8cSSatish Balay PetscTablePosition tpos; 23a30f8f8cSSatish Balay int gid,lid; 24a30f8f8cSSatish Balay #endif 25a30f8f8cSSatish Balay 26a30f8f8cSSatish Balay PetscFunctionBegin; 27a30f8f8cSSatish Balay 28a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 29a30f8f8cSSatish Balay /* use a table - Mark Adams */ 30a30f8f8cSSatish Balay PetscTableCreate(B->mbs,&gid1_lid1); 31a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 32a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 33a30f8f8cSSatish Balay int data,gid1 = aj[B->i[i]+j] + 1; 34a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35a30f8f8cSSatish Balay if (!data) { 36a30f8f8cSSatish Balay /* one based table */ 37a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38a30f8f8cSSatish Balay } 39a30f8f8cSSatish Balay } 40a30f8f8cSSatish Balay } 41a30f8f8cSSatish Balay /* form array of columns we need */ 42b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 43a30f8f8cSSatish Balay ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44a30f8f8cSSatish Balay while (tpos) { 45a30f8f8cSSatish Balay ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46a30f8f8cSSatish Balay gid--; lid--; 47a30f8f8cSSatish Balay garray[lid] = gid; 48a30f8f8cSSatish Balay } 49a30f8f8cSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 50a30f8f8cSSatish Balay /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 51a30f8f8cSSatish Balay ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 53a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 54a30f8f8cSSatish Balay } 55a30f8f8cSSatish Balay /* compact out the extra columns in B */ 56a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 57a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 58a30f8f8cSSatish Balay int gid1 = aj[B->i[i] + j] + 1; 59a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60a30f8f8cSSatish Balay lid --; 61a30f8f8cSSatish Balay aj[B->i[i]+j] = lid; 62a30f8f8cSSatish Balay } 63a30f8f8cSSatish Balay } 64a30f8f8cSSatish Balay B->nbs = ec; 65273d9f13SBarry Smith baij->B->n = ec*B->bs; 66a30f8f8cSSatish Balay ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 67a30f8f8cSSatish Balay /* Mark Adams */ 68a30f8f8cSSatish Balay #else 69a30f8f8cSSatish Balay /* For the first stab we make an array as long as the number of columns */ 70a30f8f8cSSatish Balay /* mark those columns that are in baij->B */ 71b0a32e0cSBarry Smith ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 72a30f8f8cSSatish Balay ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 73a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 74a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 75a30f8f8cSSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 76a30f8f8cSSatish Balay indices[aj[B->i[i] + j] ] = 1; 77a30f8f8cSSatish Balay } 78a30f8f8cSSatish Balay } 79a30f8f8cSSatish Balay 80a30f8f8cSSatish Balay /* form array of columns we need */ 81b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 82f3ef73ceSBarry Smith ec = 0; 83f3ef73ceSBarry Smith for (i=0; i<Nbs; i++) { 84f3ef73ceSBarry Smith if (indices[i]) { 85f3ef73ceSBarry Smith garray[ec++] = i; 86f3ef73ceSBarry Smith } 87f3ef73ceSBarry Smith } 88a30f8f8cSSatish Balay 89a30f8f8cSSatish Balay /* make indices now point into garray */ 90a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 91a30f8f8cSSatish Balay indices[garray[i]] = i; 92a30f8f8cSSatish Balay } 93a30f8f8cSSatish Balay 94a30f8f8cSSatish Balay /* compact out the extra columns in B */ 95a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 96a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 97a30f8f8cSSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 98a30f8f8cSSatish Balay } 99a30f8f8cSSatish Balay } 100a30f8f8cSSatish Balay B->nbs = ec; 101273d9f13SBarry Smith baij->B->n = ec*B->bs; 102a30f8f8cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 103a30f8f8cSSatish Balay #endif 104*633e10c7SHong Zhang 105a30f8f8cSSatish Balay /* create local vector that is used to scatter into */ 106a30f8f8cSSatish Balay ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 107a30f8f8cSSatish Balay 108a30f8f8cSSatish Balay /* create two temporary index sets for building scatter-gather */ 109eb7adc28SSatish Balay for (i=0; i<ec; i++) { 110a30f8f8cSSatish Balay garray[i] = bs*garray[i]; 111a30f8f8cSSatish Balay } 112a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 11386e15631SSatish Balay for (i=0; i<ec; i++) { 114a30f8f8cSSatish Balay garray[i] = garray[i]/bs; 115a30f8f8cSSatish Balay } 116a30f8f8cSSatish Balay 117b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 118a30f8f8cSSatish Balay for (i=0; i<ec; i++) { stmp[i] = bs*i; } 119a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 120a30f8f8cSSatish Balay ierr = PetscFree(stmp);CHKERRQ(ierr); 121a30f8f8cSSatish Balay 122a30f8f8cSSatish Balay /* create temporary global vector to generate scatter context */ 123a30f8f8cSSatish Balay /* this is inefficient, but otherwise we must do either 124a30f8f8cSSatish Balay 1) save garray until the first actual scatter when the vector is known or 125a30f8f8cSSatish Balay 2) have another way of generating a scatter context without a vector.*/ 126273d9f13SBarry Smith ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 127a30f8f8cSSatish Balay 128a30f8f8cSSatish Balay /* gnerate the scatter context */ 129a30f8f8cSSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 130a30f8f8cSSatish Balay 131a30f8f8cSSatish Balay /* 132a30f8f8cSSatish Balay Post the receives for the first matrix vector product. We sync-chronize after 133a30f8f8cSSatish Balay this on the chance that the user immediately calls MatMult() after assemblying 134a30f8f8cSSatish Balay the matrix. 135a30f8f8cSSatish Balay */ 136a30f8f8cSSatish Balay ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 137a30f8f8cSSatish Balay ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 138a30f8f8cSSatish Balay 139b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->Mvctx); 140b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->lvec); 141b0a32e0cSBarry Smith PetscLogObjectParent(mat,from); 142b0a32e0cSBarry Smith PetscLogObjectParent(mat,to); 143a30f8f8cSSatish Balay baij->garray = garray; 144b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 145a30f8f8cSSatish Balay ierr = ISDestroy(from);CHKERRQ(ierr); 146a30f8f8cSSatish Balay ierr = ISDestroy(to);CHKERRQ(ierr); 147a30f8f8cSSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 148a30f8f8cSSatish Balay PetscFunctionReturn(0); 149a30f8f8cSSatish Balay } 150a30f8f8cSSatish Balay 151a30f8f8cSSatish Balay 152a30f8f8cSSatish Balay /* 153a30f8f8cSSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 154a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 155a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 156a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 157a30f8f8cSSatish Balay 158a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 159a30f8f8cSSatish Balay they are sloppy. 160a30f8f8cSSatish Balay */ 1614a2ae208SSatish Balay #undef __FUNCT__ 1624a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ" 163a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A) 164a30f8f8cSSatish Balay { 1652896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 166a30f8f8cSSatish Balay Mat B = baij->B,Bnew; 167a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 168273d9f13SBarry Smith int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 169273d9f13SBarry Smith int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 170a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 17187828ca2SBarry Smith PetscScalar *atmp; 17282502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 17382502324SSatish Balay int l; 174a30f8f8cSSatish Balay #endif 175a30f8f8cSSatish Balay 176a30f8f8cSSatish Balay PetscFunctionBegin; 17782502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 17887828ca2SBarry Smith ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 17982502324SSatish Balay #endif 180a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 181b0a32e0cSBarry Smith ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 182a30f8f8cSSatish Balay ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 183a30f8f8cSSatish Balay ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 184a30f8f8cSSatish Balay if (baij->colmap) { 185a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 186a30f8f8cSSatish Balay ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 187a30f8f8cSSatish Balay #else 188a30f8f8cSSatish Balay ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 189a30f8f8cSSatish Balay baij->colmap = 0; 190b0a32e0cSBarry Smith PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 191a30f8f8cSSatish Balay #endif 192a30f8f8cSSatish Balay } 193a30f8f8cSSatish Balay 194a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 195a30f8f8cSSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196a30f8f8cSSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197a30f8f8cSSatish Balay 198a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 19982502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 200a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 201a30f8f8cSSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 202a30f8f8cSSatish Balay } 203a30f8f8cSSatish Balay ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 204a30f8f8cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 205a30f8f8cSSatish Balay 20682502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 207a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 208a30f8f8cSSatish Balay rvals[0] = bs*i; 209a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 210a30f8f8cSSatish Balay for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 211a30f8f8cSSatish Balay col = garray[Bbaij->j[j]]*bs; 212a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 213a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 214a30f8f8cSSatish Balay for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 215a30f8f8cSSatish Balay #else 21671730473SSatish Balay atmp = a+j*bs2 + k*bs; 217a30f8f8cSSatish Balay #endif 218c8407628SSatish Balay ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 219a30f8f8cSSatish Balay col++; 220a30f8f8cSSatish Balay } 221a30f8f8cSSatish Balay } 222a30f8f8cSSatish Balay } 223a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 224a30f8f8cSSatish Balay ierr = PetscFree(atmp);CHKERRQ(ierr); 225a30f8f8cSSatish Balay #endif 226a30f8f8cSSatish Balay ierr = PetscFree(baij->garray);CHKERRQ(ierr); 227a30f8f8cSSatish Balay baij->garray = 0; 228a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 229b0a32e0cSBarry Smith PetscLogObjectMemory(A,-ec*sizeof(int)); 230a30f8f8cSSatish Balay ierr = MatDestroy(B);CHKERRQ(ierr); 231b0a32e0cSBarry Smith PetscLogObjectParent(A,Bnew); 232a30f8f8cSSatish Balay baij->B = Bnew; 233a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 234a30f8f8cSSatish Balay PetscFunctionReturn(0); 235a30f8f8cSSatish Balay } 236a30f8f8cSSatish Balay 237a30f8f8cSSatish Balay 238a30f8f8cSSatish Balay 239