xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1*be1d678aSKris Buschelman #define PETSCMAT_DLL
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay /*
4a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
5a30f8f8cSSatish Balay */
62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
77cff1425SSatish Balay 
813f74950SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
97cff1425SSatish Balay 
10a30f8f8cSSatish Balay 
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14a30f8f8cSSatish Balay {
1540781036SHong Zhang   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
1640781036SHong Zhang   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
176849ba73SBarry Smith   PetscErrorCode ierr;
1813f74950SBarry Smith   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
1913f74950SBarry Smith   PetscInt       bs = mat->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
2040781036SHong Zhang   IS             from,to;
2140781036SHong Zhang   Vec            gvec;
2213f74950SBarry Smith   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
2313f74950SBarry Smith   PetscInt       *owners=sbaij->rowners,*sowners,*ec_owner,k;
2440781036SHong Zhang   PetscMap       vecmap;
2540781036SHong Zhang   PetscScalar    *ptr;
2640781036SHong Zhang 
2740781036SHong Zhang   PetscFunctionBegin;
2840781036SHong Zhang   if (sbaij->lvec) {
2940781036SHong Zhang     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
3040781036SHong Zhang     sbaij->lvec = 0;
3140781036SHong Zhang   }
3240781036SHong Zhang   if (sbaij->Mvctx) {
3340781036SHong Zhang     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
3440781036SHong Zhang     sbaij->Mvctx = 0;
3540781036SHong Zhang   }
3640781036SHong Zhang 
3740781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
3840781036SHong Zhang   /* mark those columns that are in sbaij->B */
3913f74950SBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
4013f74950SBarry Smith   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
4140781036SHong Zhang   for (i=0; i<mbs; i++) {
4240781036SHong Zhang     for (j=0; j<B->ilen[i]; j++) {
4340781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
4440781036SHong Zhang       indices[aj[B->i[i] + j] ] = 1;
4540781036SHong Zhang     }
4640781036SHong Zhang   }
4740781036SHong Zhang 
4840781036SHong Zhang   /* form arrays of columns we need */
4913f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
5013f74950SBarry Smith   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
5140781036SHong Zhang   ec_owner = sgarray + 2*ec;
5240781036SHong Zhang 
5340781036SHong Zhang   ec = 0;
5440781036SHong Zhang   for (j=0; j<size; j++){
5540781036SHong Zhang     for (i=owners[j]; i<owners[j+1]; i++){
5640781036SHong Zhang       if (indices[i]) {
5740781036SHong Zhang         garray[ec]   = i;
5840781036SHong Zhang         ec_owner[ec] = j;
5940781036SHong Zhang         ec++;
6040781036SHong Zhang       }
6140781036SHong Zhang     }
6240781036SHong Zhang   }
6340781036SHong Zhang 
6440781036SHong Zhang   /* make indices now point into garray */
65b424e231SHong Zhang   for (i=0; i<ec; i++) indices[garray[i]] = i;
6640781036SHong Zhang 
6740781036SHong Zhang   /* compact out the extra columns in B */
6840781036SHong Zhang   for (i=0; i<mbs; i++) {
6940781036SHong Zhang     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
7040781036SHong Zhang   }
7140781036SHong Zhang   B->nbs      = ec;
72521d7252SBarry Smith   sbaij->B->n = ec*mat->bs;
7340781036SHong Zhang   ierr = PetscFree(indices);CHKERRQ(ierr);
7440781036SHong Zhang 
7540781036SHong Zhang   /* create local vector that is used to scatter into */
7640781036SHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
7740781036SHong Zhang 
7840781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
7913f74950SBarry Smith   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
8040781036SHong Zhang   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
8140781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
8240781036SHong Zhang 
8340781036SHong Zhang   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
8440781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
8540781036SHong Zhang 
8693d1592dSHong Zhang   /* generate the scatter context
8793d1592dSHong Zhang      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
8840781036SHong Zhang   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
8940781036SHong Zhang   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
9040781036SHong Zhang   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
9140781036SHong Zhang 
9240781036SHong Zhang   sbaij->garray = garray;
9352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
9452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
9552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
9652e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
9740781036SHong Zhang 
9840781036SHong Zhang   ierr = ISDestroy(from);CHKERRQ(ierr);
9940781036SHong Zhang   ierr = ISDestroy(to);CHKERRQ(ierr);
10040781036SHong Zhang 
10140781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
10240781036SHong Zhang   lsize = (mbs + ec)*bs;
10340781036SHong Zhang   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
10440781036SHong Zhang   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
10540781036SHong Zhang   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
10640781036SHong Zhang 
10740781036SHong Zhang   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
10840781036SHong Zhang   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
10940781036SHong Zhang 
11040781036SHong Zhang   /* x index in the IS sfrom */
11140781036SHong Zhang   for (i=0; i<ec; i++) {
11240781036SHong Zhang     j = ec_owner[i];
11340781036SHong Zhang     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
11440781036SHong Zhang   }
11540781036SHong Zhang   /* b index in the IS sfrom */
11640781036SHong Zhang   k = sowners[rank]/bs + mbs;
11740781036SHong Zhang   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
11840781036SHong Zhang 
11940781036SHong Zhang   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
12040781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
12140781036SHong Zhang 
12240781036SHong Zhang   /* x index in the IS sto */
12340781036SHong Zhang   k = sowners[rank]/bs + mbs;
12440781036SHong Zhang   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
12540781036SHong Zhang   /* b index in the IS sto */
12640781036SHong Zhang   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
12740781036SHong Zhang 
12840781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
12940781036SHong Zhang 
13040781036SHong Zhang   /* gnerate the SBAIJ scatter context */
13140781036SHong Zhang   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
13240781036SHong Zhang 
13340781036SHong Zhang    /*
13440781036SHong Zhang       Post the receives for the first matrix vector product. We sync-chronize after
13540781036SHong Zhang     this on the chance that the user immediately calls MatMult() after assemblying
13640781036SHong Zhang     the matrix.
13740781036SHong Zhang   */
13840781036SHong Zhang   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
13940781036SHong Zhang 
14040781036SHong Zhang   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
1411ebc52fbSHong Zhang   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
14240781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
14340781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
1441ebc52fbSHong Zhang   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
14540781036SHong Zhang 
1461ebc52fbSHong Zhang   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
14740781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
1481ebc52fbSHong Zhang   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
14940781036SHong Zhang 
15040781036SHong Zhang   ierr = PetscFree(stmp);CHKERRQ(ierr);
15140781036SHong Zhang   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
15240781036SHong Zhang 
15352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
15452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
15552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
15652e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
15752e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
15852e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
15952e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
16052e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
16140781036SHong Zhang 
16252e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
16340781036SHong Zhang   ierr = ISDestroy(from);CHKERRQ(ierr);
16440781036SHong Zhang   ierr = ISDestroy(to);CHKERRQ(ierr);
16540781036SHong Zhang   ierr = VecDestroy(gvec);CHKERRQ(ierr);
16640781036SHong Zhang   ierr = PetscFree(sgarray);CHKERRQ(ierr);
16740781036SHong Zhang 
16840781036SHong Zhang   PetscFunctionReturn(0);
16940781036SHong Zhang }
17040781036SHong Zhang 
17140781036SHong Zhang #undef __FUNCT__
17240781036SHong Zhang #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
173dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
17440781036SHong Zhang {
1752896befeSSatish Balay   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
176a30f8f8cSSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
1776849ba73SBarry Smith   PetscErrorCode     ierr;
17813f74950SBarry Smith   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
17913f74950SBarry Smith   PetscInt           bs = mat->bs,*stmp;
180a30f8f8cSSatish Balay   IS                 from,to;
181a30f8f8cSSatish Balay   Vec                gvec;
182a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
183a30f8f8cSSatish Balay   PetscTable         gid1_lid1;
184a30f8f8cSSatish Balay   PetscTablePosition tpos;
18513f74950SBarry Smith   PetscInt           gid,lid;
1866f531f54SSatish Balay #else
18713f74950SBarry Smith   PetscInt           Nbs = baij->Nbs,*indices;
188a30f8f8cSSatish Balay #endif
189a30f8f8cSSatish Balay 
190a30f8f8cSSatish Balay   PetscFunctionBegin;
191a30f8f8cSSatish Balay 
192a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
193a30f8f8cSSatish Balay   /* use a table - Mark Adams */
194a30f8f8cSSatish Balay   PetscTableCreate(B->mbs,&gid1_lid1);
195a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
196a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
19713f74950SBarry Smith       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
198a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
199a30f8f8cSSatish Balay       if (!data) {
200a30f8f8cSSatish Balay         /* one based table */
201a30f8f8cSSatish Balay         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
202a30f8f8cSSatish Balay       }
203a30f8f8cSSatish Balay     }
204a30f8f8cSSatish Balay   }
205a30f8f8cSSatish Balay   /* form array of columns we need */
20613f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
207a30f8f8cSSatish Balay   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
208a30f8f8cSSatish Balay   while (tpos) {
209a30f8f8cSSatish Balay     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
210a30f8f8cSSatish Balay     gid--; lid--;
211a30f8f8cSSatish Balay     garray[lid] = gid;
212a30f8f8cSSatish Balay   }
213a30f8f8cSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
214a30f8f8cSSatish Balay   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
215a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
216a30f8f8cSSatish Balay     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
217a30f8f8cSSatish Balay   }
218a30f8f8cSSatish Balay   /* compact out the extra columns in B */
219a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
220a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
22113f74950SBarry Smith       PetscInt gid1 = aj[B->i[i] + j] + 1;
222a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
223a30f8f8cSSatish Balay       lid --;
224a30f8f8cSSatish Balay       aj[B->i[i]+j] = lid;
225a30f8f8cSSatish Balay     }
226a30f8f8cSSatish Balay   }
227a30f8f8cSSatish Balay   B->nbs     = ec;
2283d30b1ffSBarry Smith   baij->B->n = ec*mat->bs;
229a30f8f8cSSatish Balay   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
230a30f8f8cSSatish Balay   /* Mark Adams */
231a30f8f8cSSatish Balay #else
232a30f8f8cSSatish Balay   /* For the first stab we make an array as long as the number of columns */
233a30f8f8cSSatish Balay   /* mark those columns that are in baij->B */
23413f74950SBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
23513f74950SBarry Smith   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
236a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
237a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
238a30f8f8cSSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
239a30f8f8cSSatish Balay       indices[aj[B->i[i] + j] ] = 1;
240a30f8f8cSSatish Balay     }
241a30f8f8cSSatish Balay   }
242a30f8f8cSSatish Balay 
243a30f8f8cSSatish Balay   /* form array of columns we need */
24413f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
245f3ef73ceSBarry Smith   ec = 0;
246f3ef73ceSBarry Smith   for (i=0; i<Nbs; i++) {
247f3ef73ceSBarry Smith     if (indices[i]) {
248f3ef73ceSBarry Smith       garray[ec++] = i;
249f3ef73ceSBarry Smith     }
250f3ef73ceSBarry Smith   }
251a30f8f8cSSatish Balay 
252a30f8f8cSSatish Balay   /* make indices now point into garray */
253a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
254a30f8f8cSSatish Balay     indices[garray[i]] = i;
255a30f8f8cSSatish Balay   }
256a30f8f8cSSatish Balay 
257a30f8f8cSSatish Balay   /* compact out the extra columns in B */
258a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
259a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
260a30f8f8cSSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
261a30f8f8cSSatish Balay     }
262a30f8f8cSSatish Balay   }
263a30f8f8cSSatish Balay   B->nbs       = ec;
264521d7252SBarry Smith   baij->B->n   = ec*mat->bs;
265a30f8f8cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
266a30f8f8cSSatish Balay #endif
267633e10c7SHong Zhang 
268a30f8f8cSSatish Balay   /* create local vector that is used to scatter into */
269a30f8f8cSSatish Balay   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
270a30f8f8cSSatish Balay 
271a30f8f8cSSatish Balay   /* create two temporary index sets for building scatter-gather */
272eb7adc28SSatish Balay   for (i=0; i<ec; i++) {
273a30f8f8cSSatish Balay     garray[i] = bs*garray[i];
274a30f8f8cSSatish Balay   }
275a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
27686e15631SSatish Balay   for (i=0; i<ec; i++) {
277a30f8f8cSSatish Balay     garray[i] = garray[i]/bs;
278a30f8f8cSSatish Balay   }
279a30f8f8cSSatish Balay 
28013f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
281a30f8f8cSSatish Balay   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
282a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
283a30f8f8cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
284a30f8f8cSSatish Balay 
285a30f8f8cSSatish Balay   /* create temporary global vector to generate scatter context */
286a30f8f8cSSatish Balay   /* this is inefficient, but otherwise we must do either
287a30f8f8cSSatish Balay      1) save garray until the first actual scatter when the vector is known or
288a30f8f8cSSatish Balay      2) have another way of generating a scatter context without a vector.*/
289273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
290a30f8f8cSSatish Balay 
291a30f8f8cSSatish Balay   /* gnerate the scatter context */
292a30f8f8cSSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
293a30f8f8cSSatish Balay 
294a30f8f8cSSatish Balay   /*
295a30f8f8cSSatish Balay       Post the receives for the first matrix vector product. We sync-chronize after
296a30f8f8cSSatish Balay     this on the chance that the user immediately calls MatMult() after assemblying
297a30f8f8cSSatish Balay     the matrix.
298a30f8f8cSSatish Balay   */
299a30f8f8cSSatish Balay   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
300a30f8f8cSSatish Balay   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
301a30f8f8cSSatish Balay 
30252e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
30352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
30452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
30552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
306a30f8f8cSSatish Balay   baij->garray = garray;
30752e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
308a30f8f8cSSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
309a30f8f8cSSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
310a30f8f8cSSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
311a30f8f8cSSatish Balay   PetscFunctionReturn(0);
312a30f8f8cSSatish Balay }
313a30f8f8cSSatish Balay 
314a30f8f8cSSatish Balay 
315a30f8f8cSSatish Balay /*
316a30f8f8cSSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
317a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
318a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
319a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
320a30f8f8cSSatish Balay 
321a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
322a30f8f8cSSatish Balay    they are sloppy.
323a30f8f8cSSatish Balay */
3244a2ae208SSatish Balay #undef __FUNCT__
3254a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ"
326dfbe8321SBarry Smith PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
327a30f8f8cSSatish Balay {
3282896befeSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
329a30f8f8cSSatish Balay   Mat           B = baij->B,Bnew;
330a30f8f8cSSatish Balay   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
331dfbe8321SBarry Smith   PetscErrorCode ierr;
33213f74950SBarry Smith   PetscInt i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
33313f74950SBarry Smith   PetscInt           k,bs=A->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
334a30f8f8cSSatish Balay   MatScalar     *a = Bbaij->a;
33587828ca2SBarry Smith   PetscScalar   *atmp;
33682502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
33713f74950SBarry Smith   PetscInt           l;
338a30f8f8cSSatish Balay #endif
339a30f8f8cSSatish Balay 
340a30f8f8cSSatish Balay   PetscFunctionBegin;
34182502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
3428b32f6a4SKris Buschelman   ierr = PetscMalloc(A->bs*sizeof(PetscScalar),&atmp);
34382502324SSatish Balay #endif
344a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
345b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
346a30f8f8cSSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
347a30f8f8cSSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
348a30f8f8cSSatish Balay   if (baij->colmap) {
349a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
350a30f8f8cSSatish Balay     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
351a30f8f8cSSatish Balay #else
352a30f8f8cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
353a30f8f8cSSatish Balay     baij->colmap = 0;
35452e6d16bSBarry Smith     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
355a30f8f8cSSatish Balay #endif
356a30f8f8cSSatish Balay   }
357a30f8f8cSSatish Balay 
358a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
359a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
361a30f8f8cSSatish Balay 
362a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
36313f74950SBarry Smith   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
364a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
365a30f8f8cSSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
366a30f8f8cSSatish Balay   }
367be5d1d56SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr);
368be5d1d56SKris Buschelman   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
369521d7252SBarry Smith   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
370a30f8f8cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
371a30f8f8cSSatish Balay 
37213f74950SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
373a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
374a30f8f8cSSatish Balay     rvals[0] = bs*i;
375a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
376a30f8f8cSSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
377a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]]*bs;
378a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
379a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
380a30f8f8cSSatish Balay         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
381a30f8f8cSSatish Balay #else
38271730473SSatish Balay         atmp = a+j*bs2 + k*bs;
383a30f8f8cSSatish Balay #endif
384c8407628SSatish Balay         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
385a30f8f8cSSatish Balay         col++;
386a30f8f8cSSatish Balay       }
387a30f8f8cSSatish Balay     }
388a30f8f8cSSatish Balay   }
389a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
390a30f8f8cSSatish Balay   ierr = PetscFree(atmp);CHKERRQ(ierr);
391a30f8f8cSSatish Balay #endif
392a30f8f8cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
393a30f8f8cSSatish Balay   baij->garray = 0;
394a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
39552e6d16bSBarry Smith   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
396a30f8f8cSSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
39752e6d16bSBarry Smith   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
398a30f8f8cSSatish Balay   baij->B = Bnew;
399a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
400a30f8f8cSSatish Balay   PetscFunctionReturn(0);
401a30f8f8cSSatish Balay }
402a30f8f8cSSatish Balay 
403a30f8f8cSSatish Balay 
404a30f8f8cSSatish Balay 
405