173f4d377SMatthew Knepley /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2a30f8f8cSSatish Balay 3a30f8f8cSSatish Balay /* 4a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 5a30f8f8cSSatish Balay */ 62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7a30f8f8cSSatish Balay #include "src/vec/vecimpl.h" 887828ca2SBarry Smith extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9a30f8f8cSSatish Balay 104a2ae208SSatish Balay #undef __FUNCT__ 114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat) 13a30f8f8cSSatish Balay { 14*40781036SHong Zhang Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15*40781036SHong Zhang Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16*40781036SHong Zhang int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 17*40781036SHong Zhang int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 18*40781036SHong Zhang IS from,to; 19*40781036SHong Zhang Vec gvec; 20*40781036SHong Zhang int rank=sbaij->rank,lsize,size=sbaij->size; 21*40781036SHong Zhang int *owners=sbaij->rowners,*sowners,*ec_owner,prank=100,k; 22*40781036SHong Zhang PetscMap vecmap; 23*40781036SHong Zhang PetscScalar *ptr; 24*40781036SHong Zhang 25*40781036SHong Zhang PetscFunctionBegin; 26*40781036SHong Zhang /* 27*40781036SHong Zhang PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], MatSetUpMultiply_MPISBAIJ is called ...\n",rank); 28*40781036SHong Zhang PetscSynchronizedFlush(PETSC_COMM_WORLD); 29*40781036SHong Zhang */ 30*40781036SHong Zhang ierr = PetscOptionsGetInt(PETSC_NULL,"-prank",&prank,PETSC_NULL);CHKERRQ(ierr); 31*40781036SHong Zhang if (sbaij->lvec) { 32*40781036SHong Zhang ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 33*40781036SHong Zhang sbaij->lvec = 0; 34*40781036SHong Zhang } 35*40781036SHong Zhang if (sbaij->Mvctx) { 36*40781036SHong Zhang ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 37*40781036SHong Zhang sbaij->Mvctx = 0; 38*40781036SHong Zhang } 39*40781036SHong Zhang 40*40781036SHong Zhang /* For the first stab we make an array as long as the number of columns */ 41*40781036SHong Zhang /* mark those columns that are in sbaij->B */ 42*40781036SHong Zhang ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 43*40781036SHong Zhang ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 44*40781036SHong Zhang for (i=0; i<mbs; i++) { 45*40781036SHong Zhang for (j=0; j<B->ilen[i]; j++) { 46*40781036SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 47*40781036SHong Zhang indices[aj[B->i[i] + j] ] = 1; 48*40781036SHong Zhang } 49*40781036SHong Zhang } 50*40781036SHong Zhang 51*40781036SHong Zhang /* form arrays of columns we need */ 52*40781036SHong Zhang ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 53*40781036SHong Zhang ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 54*40781036SHong Zhang ec_owner = sgarray + 2*ec; 55*40781036SHong Zhang 56*40781036SHong Zhang ec = 0; 57*40781036SHong Zhang for (j=0; j<size; j++){ 58*40781036SHong Zhang for (i=owners[j]; i<owners[j+1]; i++){ 59*40781036SHong Zhang if (indices[i]) { 60*40781036SHong Zhang garray[ec] = i; 61*40781036SHong Zhang ec_owner[ec] = j; 62*40781036SHong Zhang ec++; 63*40781036SHong Zhang } 64*40781036SHong Zhang } 65*40781036SHong Zhang } 66*40781036SHong Zhang 67*40781036SHong Zhang if (rank == prank){ 68*40781036SHong Zhang printf("proc[%d]: \n",rank); 69*40781036SHong Zhang for (i=0; i<ec;i++) printf("[%d]: garray = %d, ec_owner = %d\n", i,garray[i], ec_owner[i]); 70*40781036SHong Zhang } 71*40781036SHong Zhang 72*40781036SHong Zhang /* make indices now point into garray */ 73*40781036SHong Zhang for (i=0; i<ec; i++) { 74*40781036SHong Zhang indices[garray[i]] = i; 75*40781036SHong Zhang } 76*40781036SHong Zhang 77*40781036SHong Zhang /* compact out the extra columns in B */ 78*40781036SHong Zhang for (i=0; i<mbs; i++) { 79*40781036SHong Zhang for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 80*40781036SHong Zhang } 81*40781036SHong Zhang B->nbs = ec; 82*40781036SHong Zhang sbaij->B->n = ec*B->bs; 83*40781036SHong Zhang ierr = PetscFree(indices);CHKERRQ(ierr); 84*40781036SHong Zhang 85*40781036SHong Zhang /* create local vector that is used to scatter into */ 86*40781036SHong Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 87*40781036SHong Zhang 88*40781036SHong Zhang /* create two temporary index sets for building scatter-gather */ 89*40781036SHong Zhang ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 90*40781036SHong Zhang for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 91*40781036SHong Zhang ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 92*40781036SHong Zhang 93*40781036SHong Zhang for (i=0; i<ec; i++) { stmp[i] = bs*i; } 94*40781036SHong Zhang ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 95*40781036SHong Zhang 96*40781036SHong Zhang if (rank == prank){ 97*40781036SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," from: ");CHKERRQ(ierr); 98*40781036SHong Zhang ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 99*40781036SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," to:");CHKERRQ(ierr); 100*40781036SHong Zhang ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 101*40781036SHong Zhang } 102*40781036SHong Zhang 103*40781036SHong Zhang /* generate the scatter context */ 104*40781036SHong Zhang ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 105*40781036SHong Zhang ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 106*40781036SHong Zhang ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 107*40781036SHong Zhang 108*40781036SHong Zhang sbaij->garray = garray; 109*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->Mvctx); 110*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->lvec); 111*40781036SHong Zhang PetscLogObjectParent(mat,from); 112*40781036SHong Zhang PetscLogObjectParent(mat,to); 113*40781036SHong Zhang 114*40781036SHong Zhang ierr = ISDestroy(from);CHKERRQ(ierr); 115*40781036SHong Zhang ierr = ISDestroy(to);CHKERRQ(ierr); 116*40781036SHong Zhang 117*40781036SHong Zhang /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 118*40781036SHong Zhang lsize = (mbs + ec)*bs; 119*40781036SHong Zhang ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 120*40781036SHong Zhang ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 121*40781036SHong Zhang ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 122*40781036SHong Zhang 123*40781036SHong Zhang ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 124*40781036SHong Zhang ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 125*40781036SHong Zhang /* for (i=0; i<=size;i++) sowners[i] /= bs; */ 126*40781036SHong Zhang 127*40781036SHong Zhang if (rank==prank){ 128*40781036SHong Zhang printf("length of slvec0: \n", vec_size); 129*40781036SHong Zhang for (i=0; i<=size;i++) printf("[%d]: owners = %d, sowners/bs = %d\n", i,owners[i], sowners[i]/bs); 130*40781036SHong Zhang } 131*40781036SHong Zhang 132*40781036SHong Zhang /* x index in the IS sfrom */ 133*40781036SHong Zhang for (i=0; i<ec; i++) { 134*40781036SHong Zhang j = ec_owner[i]; 135*40781036SHong Zhang sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 136*40781036SHong Zhang } 137*40781036SHong Zhang /* b index in the IS sfrom */ 138*40781036SHong Zhang k = sowners[rank]/bs + mbs; 139*40781036SHong Zhang for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 140*40781036SHong Zhang 141*40781036SHong Zhang for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 142*40781036SHong Zhang ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 143*40781036SHong Zhang 144*40781036SHong Zhang /* x index in the IS sto */ 145*40781036SHong Zhang k = sowners[rank]/bs + mbs; 146*40781036SHong Zhang for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 147*40781036SHong Zhang /* b index in the IS sto */ 148*40781036SHong Zhang for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 149*40781036SHong Zhang 150*40781036SHong Zhang ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 151*40781036SHong Zhang 152*40781036SHong Zhang if (rank == prank){ 153*40781036SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," sfrom: ");CHKERRQ(ierr); 154*40781036SHong Zhang ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 155*40781036SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," sto:");CHKERRQ(ierr); 156*40781036SHong Zhang ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 157*40781036SHong Zhang } 158*40781036SHong Zhang 159*40781036SHong Zhang /* gnerate the SBAIJ scatter context */ 160*40781036SHong Zhang ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 161*40781036SHong Zhang 162*40781036SHong Zhang /* 163*40781036SHong Zhang Post the receives for the first matrix vector product. We sync-chronize after 164*40781036SHong Zhang this on the chance that the user immediately calls MatMult() after assemblying 165*40781036SHong Zhang the matrix. 166*40781036SHong Zhang */ 167*40781036SHong Zhang ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 168*40781036SHong Zhang 169*40781036SHong Zhang ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 170*40781036SHong Zhang ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 171*40781036SHong Zhang ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 172*40781036SHong Zhang ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 173*40781036SHong Zhang ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 174*40781036SHong Zhang 175*40781036SHong Zhang ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 176*40781036SHong Zhang ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 177*40781036SHong Zhang ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 178*40781036SHong Zhang 179*40781036SHong Zhang ierr = PetscFree(stmp);CHKERRQ(ierr); 180*40781036SHong Zhang ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 181*40781036SHong Zhang 182*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->sMvctx); 183*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->slvec0); 184*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->slvec1); 185*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->slvec0b); 186*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->slvec1a); 187*40781036SHong Zhang PetscLogObjectParent(mat,sbaij->slvec1b); 188*40781036SHong Zhang PetscLogObjectParent(mat,from); 189*40781036SHong Zhang PetscLogObjectParent(mat,to); 190*40781036SHong Zhang 191*40781036SHong Zhang PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 192*40781036SHong Zhang ierr = ISDestroy(from);CHKERRQ(ierr); 193*40781036SHong Zhang ierr = ISDestroy(to);CHKERRQ(ierr); 194*40781036SHong Zhang ierr = VecDestroy(gvec);CHKERRQ(ierr); 195*40781036SHong Zhang ierr = PetscFree(sgarray);CHKERRQ(ierr); 196*40781036SHong Zhang 197*40781036SHong Zhang PetscFunctionReturn(0); 198*40781036SHong Zhang } 199*40781036SHong Zhang 200*40781036SHong Zhang #undef __FUNCT__ 201*40781036SHong Zhang #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 202*40781036SHong Zhang int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 203*40781036SHong Zhang { 2042896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 205a30f8f8cSSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 206a30f8f8cSSatish Balay int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 20786e15631SSatish Balay int bs = baij->bs,*stmp; 208a30f8f8cSSatish Balay IS from,to; 209a30f8f8cSSatish Balay Vec gvec; 210a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 211a30f8f8cSSatish Balay PetscTable gid1_lid1; 212a30f8f8cSSatish Balay PetscTablePosition tpos; 213a30f8f8cSSatish Balay int gid,lid; 214a30f8f8cSSatish Balay #endif 215a30f8f8cSSatish Balay 216a30f8f8cSSatish Balay PetscFunctionBegin; 217a30f8f8cSSatish Balay 218a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 219a30f8f8cSSatish Balay /* use a table - Mark Adams */ 220a30f8f8cSSatish Balay PetscTableCreate(B->mbs,&gid1_lid1); 221a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 222a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 223a30f8f8cSSatish Balay int data,gid1 = aj[B->i[i]+j] + 1; 224a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 225a30f8f8cSSatish Balay if (!data) { 226a30f8f8cSSatish Balay /* one based table */ 227a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 228a30f8f8cSSatish Balay } 229a30f8f8cSSatish Balay } 230a30f8f8cSSatish Balay } 231a30f8f8cSSatish Balay /* form array of columns we need */ 232b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 233a30f8f8cSSatish Balay ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 234a30f8f8cSSatish Balay while (tpos) { 235a30f8f8cSSatish Balay ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 236a30f8f8cSSatish Balay gid--; lid--; 237a30f8f8cSSatish Balay garray[lid] = gid; 238a30f8f8cSSatish Balay } 239a30f8f8cSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 240a30f8f8cSSatish Balay /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 241a30f8f8cSSatish Balay ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 242a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 243a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 244a30f8f8cSSatish Balay } 245a30f8f8cSSatish Balay /* compact out the extra columns in B */ 246a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 247a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 248a30f8f8cSSatish Balay int gid1 = aj[B->i[i] + j] + 1; 249a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 250a30f8f8cSSatish Balay lid --; 251a30f8f8cSSatish Balay aj[B->i[i]+j] = lid; 252a30f8f8cSSatish Balay } 253a30f8f8cSSatish Balay } 254a30f8f8cSSatish Balay B->nbs = ec; 255273d9f13SBarry Smith baij->B->n = ec*B->bs; 256a30f8f8cSSatish Balay ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 257a30f8f8cSSatish Balay /* Mark Adams */ 258a30f8f8cSSatish Balay #else 259a30f8f8cSSatish Balay /* For the first stab we make an array as long as the number of columns */ 260a30f8f8cSSatish Balay /* mark those columns that are in baij->B */ 261b0a32e0cSBarry Smith ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 262a30f8f8cSSatish Balay ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 263a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 264a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 265a30f8f8cSSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 266a30f8f8cSSatish Balay indices[aj[B->i[i] + j] ] = 1; 267a30f8f8cSSatish Balay } 268a30f8f8cSSatish Balay } 269a30f8f8cSSatish Balay 270a30f8f8cSSatish Balay /* form array of columns we need */ 271b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 272f3ef73ceSBarry Smith ec = 0; 273f3ef73ceSBarry Smith for (i=0; i<Nbs; i++) { 274f3ef73ceSBarry Smith if (indices[i]) { 275f3ef73ceSBarry Smith garray[ec++] = i; 276f3ef73ceSBarry Smith } 277f3ef73ceSBarry Smith } 278a30f8f8cSSatish Balay 279a30f8f8cSSatish Balay /* make indices now point into garray */ 280a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 281a30f8f8cSSatish Balay indices[garray[i]] = i; 282a30f8f8cSSatish Balay } 283a30f8f8cSSatish Balay 284a30f8f8cSSatish Balay /* compact out the extra columns in B */ 285a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 286a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 287a30f8f8cSSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 288a30f8f8cSSatish Balay } 289a30f8f8cSSatish Balay } 290a30f8f8cSSatish Balay B->nbs = ec; 291273d9f13SBarry Smith baij->B->n = ec*B->bs; 292a30f8f8cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 293a30f8f8cSSatish Balay #endif 294633e10c7SHong Zhang 295a30f8f8cSSatish Balay /* create local vector that is used to scatter into */ 296a30f8f8cSSatish Balay ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 297a30f8f8cSSatish Balay 298a30f8f8cSSatish Balay /* create two temporary index sets for building scatter-gather */ 299eb7adc28SSatish Balay for (i=0; i<ec; i++) { 300a30f8f8cSSatish Balay garray[i] = bs*garray[i]; 301a30f8f8cSSatish Balay } 302a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 30386e15631SSatish Balay for (i=0; i<ec; i++) { 304a30f8f8cSSatish Balay garray[i] = garray[i]/bs; 305a30f8f8cSSatish Balay } 306a30f8f8cSSatish Balay 307b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 308a30f8f8cSSatish Balay for (i=0; i<ec; i++) { stmp[i] = bs*i; } 309a30f8f8cSSatish Balay ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 310a30f8f8cSSatish Balay ierr = PetscFree(stmp);CHKERRQ(ierr); 311a30f8f8cSSatish Balay 312a30f8f8cSSatish Balay /* create temporary global vector to generate scatter context */ 313a30f8f8cSSatish Balay /* this is inefficient, but otherwise we must do either 314a30f8f8cSSatish Balay 1) save garray until the first actual scatter when the vector is known or 315a30f8f8cSSatish Balay 2) have another way of generating a scatter context without a vector.*/ 316273d9f13SBarry Smith ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 317a30f8f8cSSatish Balay 318a30f8f8cSSatish Balay /* gnerate the scatter context */ 319a30f8f8cSSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 320a30f8f8cSSatish Balay 321a30f8f8cSSatish Balay /* 322a30f8f8cSSatish Balay Post the receives for the first matrix vector product. We sync-chronize after 323a30f8f8cSSatish Balay this on the chance that the user immediately calls MatMult() after assemblying 324a30f8f8cSSatish Balay the matrix. 325a30f8f8cSSatish Balay */ 326a30f8f8cSSatish Balay ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 327a30f8f8cSSatish Balay ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 328a30f8f8cSSatish Balay 329b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->Mvctx); 330b0a32e0cSBarry Smith PetscLogObjectParent(mat,baij->lvec); 331b0a32e0cSBarry Smith PetscLogObjectParent(mat,from); 332b0a32e0cSBarry Smith PetscLogObjectParent(mat,to); 333a30f8f8cSSatish Balay baij->garray = garray; 334b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 335a30f8f8cSSatish Balay ierr = ISDestroy(from);CHKERRQ(ierr); 336a30f8f8cSSatish Balay ierr = ISDestroy(to);CHKERRQ(ierr); 337a30f8f8cSSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 338a30f8f8cSSatish Balay PetscFunctionReturn(0); 339a30f8f8cSSatish Balay } 340a30f8f8cSSatish Balay 341a30f8f8cSSatish Balay 342a30f8f8cSSatish Balay /* 343a30f8f8cSSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 344a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 345a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 346a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 347a30f8f8cSSatish Balay 348a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 349a30f8f8cSSatish Balay they are sloppy. 350a30f8f8cSSatish Balay */ 3514a2ae208SSatish Balay #undef __FUNCT__ 3524a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ" 353a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A) 354a30f8f8cSSatish Balay { 3552896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 356a30f8f8cSSatish Balay Mat B = baij->B,Bnew; 357a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 358273d9f13SBarry Smith int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 359273d9f13SBarry Smith int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 360a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 36187828ca2SBarry Smith PetscScalar *atmp; 36282502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 36382502324SSatish Balay int l; 364a30f8f8cSSatish Balay #endif 365a30f8f8cSSatish Balay 366a30f8f8cSSatish Balay PetscFunctionBegin; 36782502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 36887828ca2SBarry Smith ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 36982502324SSatish Balay #endif 370a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 371b0a32e0cSBarry Smith ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 372a30f8f8cSSatish Balay ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 373a30f8f8cSSatish Balay ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 374a30f8f8cSSatish Balay if (baij->colmap) { 375a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 376a30f8f8cSSatish Balay ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 377a30f8f8cSSatish Balay #else 378a30f8f8cSSatish Balay ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 379a30f8f8cSSatish Balay baij->colmap = 0; 380b0a32e0cSBarry Smith PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 381a30f8f8cSSatish Balay #endif 382a30f8f8cSSatish Balay } 383a30f8f8cSSatish Balay 384a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 385a30f8f8cSSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 386a30f8f8cSSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 387a30f8f8cSSatish Balay 388a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 38982502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 390a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 391a30f8f8cSSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 392a30f8f8cSSatish Balay } 393a30f8f8cSSatish Balay ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 394a30f8f8cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 395a30f8f8cSSatish Balay 39682502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 397a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 398a30f8f8cSSatish Balay rvals[0] = bs*i; 399a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 400a30f8f8cSSatish Balay for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 401a30f8f8cSSatish Balay col = garray[Bbaij->j[j]]*bs; 402a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 403a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 404a30f8f8cSSatish Balay for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 405a30f8f8cSSatish Balay #else 40671730473SSatish Balay atmp = a+j*bs2 + k*bs; 407a30f8f8cSSatish Balay #endif 408c8407628SSatish Balay ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 409a30f8f8cSSatish Balay col++; 410a30f8f8cSSatish Balay } 411a30f8f8cSSatish Balay } 412a30f8f8cSSatish Balay } 413a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 414a30f8f8cSSatish Balay ierr = PetscFree(atmp);CHKERRQ(ierr); 415a30f8f8cSSatish Balay #endif 416a30f8f8cSSatish Balay ierr = PetscFree(baij->garray);CHKERRQ(ierr); 417a30f8f8cSSatish Balay baij->garray = 0; 418a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 419b0a32e0cSBarry Smith PetscLogObjectMemory(A,-ec*sizeof(int)); 420a30f8f8cSSatish Balay ierr = MatDestroy(B);CHKERRQ(ierr); 421b0a32e0cSBarry Smith PetscLogObjectParent(A,Bnew); 422a30f8f8cSSatish Balay baij->B = Bnew; 423a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 424a30f8f8cSSatish Balay PetscFunctionReturn(0); 425a30f8f8cSSatish Balay } 426a30f8f8cSSatish Balay 427a30f8f8cSSatish Balay 428a30f8f8cSSatish Balay 429