1*a30f8f8cSSatish Balay /*$Id: mmsbaij.c,v 1.32 2000/04/12 04:23:40 bsmith Exp $*/ 2*a30f8f8cSSatish Balay 3*a30f8f8cSSatish Balay /* 4*a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 5*a30f8f8cSSatish Balay */ 6*a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" 7*a30f8f8cSSatish Balay #include "src/vec/vecimpl.h" 8*a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 9*a30f8f8cSSatish Balay 10*a30f8f8cSSatish Balay #undef __FUNC__ 11*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPISBAIJ" 12*a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat) 13*a30f8f8cSSatish Balay { 14*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15*a30f8f8cSSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16*a30f8f8cSSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17*a30f8f8cSSatish Balay int col,bs = baij->bs,*tmp,*stmp; 18*a30f8f8cSSatish Balay IS from,to; 19*a30f8f8cSSatish Balay Vec gvec; 20*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 21*a30f8f8cSSatish Balay PetscTable gid1_lid1; 22*a30f8f8cSSatish Balay PetscTablePosition tpos; 23*a30f8f8cSSatish Balay int gid,lid; 24*a30f8f8cSSatish Balay #endif 25*a30f8f8cSSatish Balay 26*a30f8f8cSSatish Balay PetscFunctionBegin; 27*a30f8f8cSSatish Balay 28*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 29*a30f8f8cSSatish Balay /* use a table - Mark Adams */ 30*a30f8f8cSSatish Balay PetscTableCreate(B->mbs,&gid1_lid1); 31*a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 32*a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 33*a30f8f8cSSatish Balay int data,gid1 = aj[B->i[i]+j] + 1; 34*a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35*a30f8f8cSSatish Balay if (!data) { 36*a30f8f8cSSatish Balay /* one based table */ 37*a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38*a30f8f8cSSatish Balay } 39*a30f8f8cSSatish Balay } 40*a30f8f8cSSatish Balay } 41*a30f8f8cSSatish Balay /* form array of columns we need */ 42*a30f8f8cSSatish Balay garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 43*a30f8f8cSSatish Balay tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 44*a30f8f8cSSatish Balay ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45*a30f8f8cSSatish Balay while (tpos) { 46*a30f8f8cSSatish Balay ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47*a30f8f8cSSatish Balay gid--; lid--; 48*a30f8f8cSSatish Balay garray[lid] = gid; 49*a30f8f8cSSatish Balay } 50*a30f8f8cSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51*a30f8f8cSSatish Balay /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 52*a30f8f8cSSatish Balay ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53*a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 54*a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55*a30f8f8cSSatish Balay } 56*a30f8f8cSSatish Balay /* compact out the extra columns in B */ 57*a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 58*a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 59*a30f8f8cSSatish Balay int gid1 = aj[B->i[i] + j] + 1; 60*a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61*a30f8f8cSSatish Balay lid --; 62*a30f8f8cSSatish Balay aj[B->i[i]+j] = lid; 63*a30f8f8cSSatish Balay } 64*a30f8f8cSSatish Balay } 65*a30f8f8cSSatish Balay B->nbs = ec; 66*a30f8f8cSSatish Balay B->n = ec*B->bs; 67*a30f8f8cSSatish Balay ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 68*a30f8f8cSSatish Balay /* Mark Adams */ 69*a30f8f8cSSatish Balay #else 70*a30f8f8cSSatish Balay /* For the first stab we make an array as long as the number of columns */ 71*a30f8f8cSSatish Balay /* mark those columns that are in baij->B */ 72*a30f8f8cSSatish Balay indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices); 73*a30f8f8cSSatish Balay ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 74*a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 75*a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 76*a30f8f8cSSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 77*a30f8f8cSSatish Balay indices[aj[B->i[i] + j] ] = 1; 78*a30f8f8cSSatish Balay } 79*a30f8f8cSSatish Balay } 80*a30f8f8cSSatish Balay 81*a30f8f8cSSatish Balay /* form array of columns we need */ 82*a30f8f8cSSatish Balay garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 83*a30f8f8cSSatish Balay tmp = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp) 84*a30f8f8cSSatish Balay ec = 0; 85*a30f8f8cSSatish Balay for (i=0; i<Nbs; i++) { 86*a30f8f8cSSatish Balay if (indices[i]) { 87*a30f8f8cSSatish Balay garray[ec++] = i; 88*a30f8f8cSSatish Balay } 89*a30f8f8cSSatish Balay } 90*a30f8f8cSSatish Balay 91*a30f8f8cSSatish Balay /* make indices now point into garray */ 92*a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 93*a30f8f8cSSatish Balay indices[garray[i]] = i; 94*a30f8f8cSSatish Balay } 95*a30f8f8cSSatish Balay 96*a30f8f8cSSatish Balay /* compact out the extra columns in B */ 97*a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 98*a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 99*a30f8f8cSSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100*a30f8f8cSSatish Balay } 101*a30f8f8cSSatish Balay } 102*a30f8f8cSSatish Balay B->nbs = ec; 103*a30f8f8cSSatish Balay B->n = ec*B->bs; 104*a30f8f8cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 105*a30f8f8cSSatish Balay #endif 106*a30f8f8cSSatish Balay 107*a30f8f8cSSatish Balay for (i=0,col=0; i<ec; i++) { 108*a30f8f8cSSatish Balay for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109*a30f8f8cSSatish Balay } 110*a30f8f8cSSatish Balay /* create local vector that is used to scatter into */ 111*a30f8f8cSSatish Balay ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 112*a30f8f8cSSatish Balay 113*a30f8f8cSSatish Balay /* create two temporary index sets for building scatter-gather */ 114*a30f8f8cSSatish Balay 115*a30f8f8cSSatish Balay /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 116*a30f8f8cSSatish Balay for (i=0,col=0; i<ec; i++) { 117*a30f8f8cSSatish Balay garray[i] = bs*garray[i]; 118*a30f8f8cSSatish Balay } 119*a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 120*a30f8f8cSSatish Balay for (i=0,col=0; i<ec; i++) { 121*a30f8f8cSSatish Balay garray[i] = garray[i]/bs; 122*a30f8f8cSSatish Balay } 123*a30f8f8cSSatish Balay 124*a30f8f8cSSatish Balay stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp); 125*a30f8f8cSSatish Balay for (i=0; i<ec; i++) { stmp[i] = bs*i; } 126*a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 127*a30f8f8cSSatish Balay ierr = PetscFree(stmp);CHKERRQ(ierr); 128*a30f8f8cSSatish Balay 129*a30f8f8cSSatish Balay /* create temporary global vector to generate scatter context */ 130*a30f8f8cSSatish Balay /* this is inefficient, but otherwise we must do either 131*a30f8f8cSSatish Balay 1) save garray until the first actual scatter when the vector is known or 132*a30f8f8cSSatish Balay 2) have another way of generating a scatter context without a vector.*/ 133*a30f8f8cSSatish Balay ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 134*a30f8f8cSSatish Balay 135*a30f8f8cSSatish Balay /* gnerate the scatter context */ 136*a30f8f8cSSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 137*a30f8f8cSSatish Balay 138*a30f8f8cSSatish Balay /* 139*a30f8f8cSSatish Balay Post the receives for the first matrix vector product. We sync-chronize after 140*a30f8f8cSSatish Balay this on the chance that the user immediately calls MatMult() after assemblying 141*a30f8f8cSSatish Balay the matrix. 142*a30f8f8cSSatish Balay */ 143*a30f8f8cSSatish Balay ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 144*a30f8f8cSSatish Balay ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 145*a30f8f8cSSatish Balay 146*a30f8f8cSSatish Balay PLogObjectParent(mat,baij->Mvctx); 147*a30f8f8cSSatish Balay PLogObjectParent(mat,baij->lvec); 148*a30f8f8cSSatish Balay PLogObjectParent(mat,from); 149*a30f8f8cSSatish Balay PLogObjectParent(mat,to); 150*a30f8f8cSSatish Balay baij->garray = garray; 151*a30f8f8cSSatish Balay PLogObjectMemory(mat,(ec+1)*sizeof(int)); 152*a30f8f8cSSatish Balay ierr = ISDestroy(from);CHKERRQ(ierr); 153*a30f8f8cSSatish Balay ierr = ISDestroy(to);CHKERRQ(ierr); 154*a30f8f8cSSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 155*a30f8f8cSSatish Balay ierr = PetscFree(tmp);CHKERRQ(ierr); 156*a30f8f8cSSatish Balay PetscFunctionReturn(0); 157*a30f8f8cSSatish Balay } 158*a30f8f8cSSatish Balay 159*a30f8f8cSSatish Balay 160*a30f8f8cSSatish Balay /* 161*a30f8f8cSSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 162*a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 163*a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 164*a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 165*a30f8f8cSSatish Balay 166*a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 167*a30f8f8cSSatish Balay they are sloppy. 168*a30f8f8cSSatish Balay */ 169*a30f8f8cSSatish Balay #undef __FUNC__ 170*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"DisAssemble_MPISBAIJ" 171*a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A) 172*a30f8f8cSSatish Balay { 173*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 174*a30f8f8cSSatish Balay Mat B = baij->B,Bnew; 175*a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 176*a30f8f8cSSatish Balay int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 177*a30f8f8cSSatish Balay int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 178*a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 179*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 180*a30f8f8cSSatish Balay Scalar *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar)); 181*a30f8f8cSSatish Balay int l; 182*a30f8f8cSSatish Balay #else 183*a30f8f8cSSatish Balay Scalar *atmp; 184*a30f8f8cSSatish Balay #endif 185*a30f8f8cSSatish Balay 186*a30f8f8cSSatish Balay PetscFunctionBegin; 187*a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 188*a30f8f8cSSatish Balay ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 189*a30f8f8cSSatish Balay ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 190*a30f8f8cSSatish Balay ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 191*a30f8f8cSSatish Balay if (baij->colmap) { 192*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 193*a30f8f8cSSatish Balay ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 194*a30f8f8cSSatish Balay #else 195*a30f8f8cSSatish Balay ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 196*a30f8f8cSSatish Balay baij->colmap = 0; 197*a30f8f8cSSatish Balay PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 198*a30f8f8cSSatish Balay #endif 199*a30f8f8cSSatish Balay } 200*a30f8f8cSSatish Balay 201*a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 202*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 203*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 204*a30f8f8cSSatish Balay 205*a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 206*a30f8f8cSSatish Balay nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz); 207*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 208*a30f8f8cSSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 209*a30f8f8cSSatish Balay } 210*a30f8f8cSSatish Balay ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 211*a30f8f8cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 212*a30f8f8cSSatish Balay 213*a30f8f8cSSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 214*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 215*a30f8f8cSSatish Balay rvals[0] = bs*i; 216*a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 217*a30f8f8cSSatish Balay for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 218*a30f8f8cSSatish Balay col = garray[Bbaij->j[j]]*bs; 219*a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 220*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 221*a30f8f8cSSatish Balay for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 222*a30f8f8cSSatish Balay #else 223*a30f8f8cSSatish Balay atmp = a+j*bs2; 224*a30f8f8cSSatish Balay #endif 225*a30f8f8cSSatish Balay ierr = MatSetValues_SeqBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 226*a30f8f8cSSatish Balay col++; 227*a30f8f8cSSatish Balay } 228*a30f8f8cSSatish Balay } 229*a30f8f8cSSatish Balay } 230*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 231*a30f8f8cSSatish Balay ierr = PetscFree(atmp);CHKERRQ(ierr); 232*a30f8f8cSSatish Balay #endif 233*a30f8f8cSSatish Balay ierr = PetscFree(baij->garray);CHKERRQ(ierr); 234*a30f8f8cSSatish Balay baij->garray = 0; 235*a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 236*a30f8f8cSSatish Balay PLogObjectMemory(A,-ec*sizeof(int)); 237*a30f8f8cSSatish Balay ierr = MatDestroy(B);CHKERRQ(ierr); 238*a30f8f8cSSatish Balay PLogObjectParent(A,Bnew); 239*a30f8f8cSSatish Balay baij->B = Bnew; 240*a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 241*a30f8f8cSSatish Balay PetscFunctionReturn(0); 242*a30f8f8cSSatish Balay } 243*a30f8f8cSSatish Balay 244*a30f8f8cSSatish Balay 245*a30f8f8cSSatish Balay 246