173f4d377SMatthew Knepley /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2a30f8f8cSSatish Balay 3a30f8f8cSSatish Balay /* 4a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 5a30f8f8cSSatish Balay */ 62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7a30f8f8cSSatish Balay #include "src/vec/vecimpl.h" 887828ca2SBarry Smith extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9a30f8f8cSSatish Balay 104a2ae208SSatish Balay #undef __FUNCT__ 114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat) 13a30f8f8cSSatish Balay { 142896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 15a30f8f8cSSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16a30f8f8cSSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17*86e15631SSatish Balay int bs = baij->bs,*stmp; 18a30f8f8cSSatish Balay IS from,to; 19a30f8f8cSSatish Balay Vec gvec; 20a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 21a30f8f8cSSatish Balay PetscTable gid1_lid1; 22a30f8f8cSSatish Balay PetscTablePosition tpos; 23a30f8f8cSSatish Balay int gid,lid; 24a30f8f8cSSatish Balay #endif 25a30f8f8cSSatish Balay 26a30f8f8cSSatish Balay PetscFunctionBegin; 27a30f8f8cSSatish Balay 28a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 29a30f8f8cSSatish Balay /* use a table - Mark Adams */ 30a30f8f8cSSatish Balay PetscTableCreate(B->mbs,&gid1_lid1); 31a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 32a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 33a30f8f8cSSatish Balay int data,gid1 = aj[B->i[i]+j] + 1; 34a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35a30f8f8cSSatish Balay if (!data) { 36a30f8f8cSSatish Balay /* one based table */ 37a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38a30f8f8cSSatish Balay } 39a30f8f8cSSatish Balay } 40a30f8f8cSSatish Balay } 41a30f8f8cSSatish Balay /* form array of columns we need */ 42b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 436dcce596SHong Zhang /* ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); */ 44a30f8f8cSSatish Balay ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45a30f8f8cSSatish Balay while (tpos) { 46a30f8f8cSSatish Balay ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47a30f8f8cSSatish Balay gid--; lid--; 48a30f8f8cSSatish Balay garray[lid] = gid; 49a30f8f8cSSatish Balay } 50a30f8f8cSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51a30f8f8cSSatish Balay /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 52a30f8f8cSSatish Balay ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 54a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55a30f8f8cSSatish Balay } 56a30f8f8cSSatish Balay /* compact out the extra columns in B */ 57a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 58a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 59a30f8f8cSSatish Balay int gid1 = aj[B->i[i] + j] + 1; 60a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61a30f8f8cSSatish Balay lid --; 62a30f8f8cSSatish Balay aj[B->i[i]+j] = lid; 63a30f8f8cSSatish Balay } 64a30f8f8cSSatish Balay } 65a30f8f8cSSatish Balay B->nbs = ec; 66273d9f13SBarry Smith baij->B->n = ec*B->bs; 67a30f8f8cSSatish Balay ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 68a30f8f8cSSatish Balay /* Mark Adams */ 69a30f8f8cSSatish Balay #else 70a30f8f8cSSatish Balay /* For the first stab we make an array as long as the number of columns */ 71a30f8f8cSSatish Balay /* mark those columns that are in baij->B */ 72b0a32e0cSBarry Smith ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 73a30f8f8cSSatish Balay ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 74a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 75a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 76a30f8f8cSSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 77a30f8f8cSSatish Balay indices[aj[B->i[i] + j] ] = 1; 78a30f8f8cSSatish Balay } 79a30f8f8cSSatish Balay } 80a30f8f8cSSatish Balay 81a30f8f8cSSatish Balay /* form array of columns we need */ 82b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 836dcce596SHong Zhang /* ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); */ 84f3ef73ceSBarry Smith ec = 0; 85f3ef73ceSBarry Smith for (i=0; i<Nbs; i++) { 86f3ef73ceSBarry Smith if (indices[i]) { 87f3ef73ceSBarry Smith garray[ec++] = i; 88f3ef73ceSBarry Smith } 89f3ef73ceSBarry Smith } 90a30f8f8cSSatish Balay 91a30f8f8cSSatish Balay /* make indices now point into garray */ 92a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 93a30f8f8cSSatish Balay indices[garray[i]] = i; 94a30f8f8cSSatish Balay } 95a30f8f8cSSatish Balay 96a30f8f8cSSatish Balay /* compact out the extra columns in B */ 97a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 98a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 99a30f8f8cSSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100a30f8f8cSSatish Balay } 101a30f8f8cSSatish Balay } 102a30f8f8cSSatish Balay B->nbs = ec; 103273d9f13SBarry Smith baij->B->n = ec*B->bs; 104a30f8f8cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 105a30f8f8cSSatish Balay #endif 1066dcce596SHong Zhang /* 107a30f8f8cSSatish Balay for (i=0,col=0; i<ec; i++) { 108a30f8f8cSSatish Balay for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109a30f8f8cSSatish Balay } 1106dcce596SHong Zhang */ 111a30f8f8cSSatish Balay /* create local vector that is used to scatter into */ 112a30f8f8cSSatish Balay ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 113a30f8f8cSSatish Balay 114a30f8f8cSSatish Balay /* create two temporary index sets for building scatter-gather */ 115a30f8f8cSSatish Balay 116a30f8f8cSSatish Balay /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 117eb7adc28SSatish Balay for (i=0; i<ec; i++) { 118a30f8f8cSSatish Balay garray[i] = bs*garray[i]; 119a30f8f8cSSatish Balay } 120a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 121*86e15631SSatish Balay for (i=0; i<ec; i++) { 122a30f8f8cSSatish Balay garray[i] = garray[i]/bs; 123a30f8f8cSSatish Balay } 124a30f8f8cSSatish Balay 125b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 126a30f8f8cSSatish Balay for (i=0; i<ec; i++) { stmp[i] = bs*i; } 127a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 128a30f8f8cSSatish Balay ierr = PetscFree(stmp);CHKERRQ(ierr); 129a30f8f8cSSatish Balay 130a30f8f8cSSatish Balay /* create temporary global vector to generate scatter context */ 131a30f8f8cSSatish Balay /* this is inefficient, but otherwise we must do either 132a30f8f8cSSatish Balay 1) save garray until the first actual scatter when the vector is known or 133a30f8f8cSSatish Balay 2) have another way of generating a scatter context without a vector.*/ 134273d9f13SBarry Smith ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 135a30f8f8cSSatish Balay 136a30f8f8cSSatish Balay /* gnerate the scatter context */ 137a30f8f8cSSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 138a30f8f8cSSatish Balay 139a30f8f8cSSatish Balay /* 140a30f8f8cSSatish Balay Post the receives for the first matrix vector product. We sync-chronize after 141a30f8f8cSSatish Balay this on the chance that the user immediately calls MatMult() after assemblying 142a30f8f8cSSatish Balay the matrix. 143a30f8f8cSSatish Balay */ 144a30f8f8cSSatish Balay ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 145a30f8f8cSSatish Balay ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 146a30f8f8cSSatish Balay 147b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->Mvctx); 148b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->lvec); 149b0a32e0cSBarry Smith PetscLogObjectParent(mat,from); 150b0a32e0cSBarry Smith PetscLogObjectParent(mat,to); 151a30f8f8cSSatish Balay baij->garray = garray; 152b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 153a30f8f8cSSatish Balay ierr = ISDestroy(from);CHKERRQ(ierr); 154a30f8f8cSSatish Balay ierr = ISDestroy(to);CHKERRQ(ierr); 155a30f8f8cSSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 1566dcce596SHong Zhang /* ierr = PetscFree(tmp);CHKERRQ(ierr); */ 157a30f8f8cSSatish Balay PetscFunctionReturn(0); 158a30f8f8cSSatish Balay } 159a30f8f8cSSatish Balay 160a30f8f8cSSatish Balay 161a30f8f8cSSatish Balay /* 162a30f8f8cSSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 163a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 164a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 165a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 166a30f8f8cSSatish Balay 167a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 168a30f8f8cSSatish Balay they are sloppy. 169a30f8f8cSSatish Balay */ 1704a2ae208SSatish Balay #undef __FUNCT__ 1714a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ" 172a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A) 173a30f8f8cSSatish Balay { 1742896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 175a30f8f8cSSatish Balay Mat B = baij->B,Bnew; 176a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 177273d9f13SBarry Smith int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 178273d9f13SBarry Smith int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 179a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 18087828ca2SBarry Smith PetscScalar *atmp; 18182502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 18282502324SSatish Balay int l; 183a30f8f8cSSatish Balay #endif 184a30f8f8cSSatish Balay 185a30f8f8cSSatish Balay PetscFunctionBegin; 18682502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 18787828ca2SBarry Smith ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 18882502324SSatish Balay #endif 189a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 190b0a32e0cSBarry Smith ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 191a30f8f8cSSatish Balay ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 192a30f8f8cSSatish Balay ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 193a30f8f8cSSatish Balay if (baij->colmap) { 194a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 195a30f8f8cSSatish Balay ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 196a30f8f8cSSatish Balay #else 197a30f8f8cSSatish Balay ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 198a30f8f8cSSatish Balay baij->colmap = 0; 199b0a32e0cSBarry Smith PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 200a30f8f8cSSatish Balay #endif 201a30f8f8cSSatish Balay } 202a30f8f8cSSatish Balay 203a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 204a30f8f8cSSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 205a30f8f8cSSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 206a30f8f8cSSatish Balay 207a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 20882502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 209a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 210a30f8f8cSSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 211a30f8f8cSSatish Balay } 212a30f8f8cSSatish Balay ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 213a30f8f8cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 214a30f8f8cSSatish Balay 21582502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 216a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 217a30f8f8cSSatish Balay rvals[0] = bs*i; 218a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 219a30f8f8cSSatish Balay for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 220a30f8f8cSSatish Balay col = garray[Bbaij->j[j]]*bs; 221a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 222a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 223a30f8f8cSSatish Balay for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 224a30f8f8cSSatish Balay #else 22571730473SSatish Balay atmp = a+j*bs2 + k*bs; 226a30f8f8cSSatish Balay #endif 227c8407628SSatish Balay ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 228a30f8f8cSSatish Balay col++; 229a30f8f8cSSatish Balay } 230a30f8f8cSSatish Balay } 231a30f8f8cSSatish Balay } 232a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 233a30f8f8cSSatish Balay ierr = PetscFree(atmp);CHKERRQ(ierr); 234a30f8f8cSSatish Balay #endif 235a30f8f8cSSatish Balay ierr = PetscFree(baij->garray);CHKERRQ(ierr); 236a30f8f8cSSatish Balay baij->garray = 0; 237a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 238b0a32e0cSBarry Smith PetscLogObjectMemory(A,-ec*sizeof(int)); 239a30f8f8cSSatish Balay ierr = MatDestroy(B);CHKERRQ(ierr); 240b0a32e0cSBarry Smith PetscLogObjectParent(A,Bnew); 241a30f8f8cSSatish Balay baij->B = Bnew; 242a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 243a30f8f8cSSatish Balay PetscFunctionReturn(0); 244a30f8f8cSSatish Balay } 245a30f8f8cSSatish Balay 246a30f8f8cSSatish Balay 247a30f8f8cSSatish Balay 248