xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 6857c123a4e6590fbaa83d0dbcd1ca4d08b61509)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay /*
4a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
5a30f8f8cSSatish Balay */
62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
77cff1425SSatish Balay 
813f74950SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
97cff1425SSatish Balay 
10a30f8f8cSSatish Balay 
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14a30f8f8cSSatish Balay {
1540781036SHong Zhang   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
1640781036SHong Zhang   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
176849ba73SBarry Smith   PetscErrorCode ierr;
1813f74950SBarry Smith   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19899cda47SBarry Smith   PetscInt       bs = mat->rmap.bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
2040781036SHong Zhang   IS             from,to;
2140781036SHong Zhang   Vec            gvec;
2213f74950SBarry Smith   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23899cda47SBarry Smith   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
2440781036SHong Zhang   PetscScalar    *ptr;
2540781036SHong Zhang 
2640781036SHong Zhang   PetscFunctionBegin;
2701b2bd88SHong Zhang   if (sbaij->sMvctx) {
2801b2bd88SHong Zhang     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
2901b2bd88SHong Zhang     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
3001b2bd88SHong Zhang     sbaij->sMvctx = 0;
3140781036SHong Zhang   }
3240781036SHong Zhang 
3340781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
3440781036SHong Zhang   /* mark those columns that are in sbaij->B */
3513f74950SBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
3613f74950SBarry Smith   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
3740781036SHong Zhang   for (i=0; i<mbs; i++) {
3840781036SHong Zhang     for (j=0; j<B->ilen[i]; j++) {
3940781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
4040781036SHong Zhang       indices[aj[B->i[i] + j] ] = 1;
4140781036SHong Zhang     }
4240781036SHong Zhang   }
4340781036SHong Zhang 
4440781036SHong Zhang   /* form arrays of columns we need */
4513f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
4613f74950SBarry Smith   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
4740781036SHong Zhang   ec_owner = sgarray + 2*ec;
4840781036SHong Zhang 
4940781036SHong Zhang   ec = 0;
5040781036SHong Zhang   for (j=0; j<size; j++){
5140781036SHong Zhang     for (i=owners[j]; i<owners[j+1]; i++){
5240781036SHong Zhang       if (indices[i]) {
5340781036SHong Zhang         garray[ec]   = i;
5440781036SHong Zhang         ec_owner[ec] = j;
5540781036SHong Zhang         ec++;
5640781036SHong Zhang       }
5740781036SHong Zhang     }
5840781036SHong Zhang   }
5940781036SHong Zhang 
6040781036SHong Zhang   /* make indices now point into garray */
61b424e231SHong Zhang   for (i=0; i<ec; i++) indices[garray[i]] = i;
6240781036SHong Zhang 
6340781036SHong Zhang   /* compact out the extra columns in B */
6440781036SHong Zhang   for (i=0; i<mbs; i++) {
6540781036SHong Zhang     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
6640781036SHong Zhang   }
6740781036SHong Zhang   B->nbs      = ec;
68*6857c123SSatish Balay   sbaij->B->cmap.n = sbaij->B->cmap.N = ec*mat->rmap.bs;
69*6857c123SSatish Balay   ierr = PetscMapInitialize(sbaij->B->comm,&(sbaij->B->cmap));CHKERRQ(ierr);
7040781036SHong Zhang   ierr = PetscFree(indices);CHKERRQ(ierr);
7140781036SHong Zhang 
7240781036SHong Zhang   /* create local vector that is used to scatter into */
7340781036SHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
7440781036SHong Zhang 
7540781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
7613f74950SBarry Smith   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
7740781036SHong Zhang   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
7840781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
7940781036SHong Zhang 
8040781036SHong Zhang   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
8140781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
8240781036SHong Zhang 
8393d1592dSHong Zhang   /* generate the scatter context
8493d1592dSHong Zhang      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
85899cda47SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
8640781036SHong Zhang   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
8740781036SHong Zhang   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
8840781036SHong Zhang 
8940781036SHong Zhang   sbaij->garray = garray;
9052e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
9152e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
9252e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
9352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
9440781036SHong Zhang 
9540781036SHong Zhang   ierr = ISDestroy(from);CHKERRQ(ierr);
9640781036SHong Zhang   ierr = ISDestroy(to);CHKERRQ(ierr);
9740781036SHong Zhang 
9840781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
9940781036SHong Zhang   lsize = (mbs + ec)*bs;
10040781036SHong Zhang   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
10140781036SHong Zhang   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
10240781036SHong Zhang   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
10340781036SHong Zhang 
104bba805e6SBarry Smith   sowners = sbaij->slvec0->map.range;
10540781036SHong Zhang 
10640781036SHong Zhang   /* x index in the IS sfrom */
10740781036SHong Zhang   for (i=0; i<ec; i++) {
10840781036SHong Zhang     j = ec_owner[i];
10940781036SHong Zhang     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
11040781036SHong Zhang   }
11140781036SHong Zhang   /* b index in the IS sfrom */
11240781036SHong Zhang   k = sowners[rank]/bs + mbs;
11340781036SHong Zhang   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
11440781036SHong Zhang 
11540781036SHong Zhang   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
11640781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
11740781036SHong Zhang 
11840781036SHong Zhang   /* x index in the IS sto */
11940781036SHong Zhang   k = sowners[rank]/bs + mbs;
12040781036SHong Zhang   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
12140781036SHong Zhang   /* b index in the IS sto */
12240781036SHong Zhang   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
12340781036SHong Zhang 
12440781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
12540781036SHong Zhang 
12640781036SHong Zhang   /* gnerate the SBAIJ scatter context */
12740781036SHong Zhang   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
12840781036SHong Zhang 
12940781036SHong Zhang    /*
13040781036SHong Zhang       Post the receives for the first matrix vector product. We sync-chronize after
13140781036SHong Zhang     this on the chance that the user immediately calls MatMult() after assemblying
13240781036SHong Zhang     the matrix.
13340781036SHong Zhang   */
13440781036SHong Zhang   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
13540781036SHong Zhang 
13640781036SHong Zhang   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
1371ebc52fbSHong Zhang   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
13840781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
13940781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
1401ebc52fbSHong Zhang   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
14140781036SHong Zhang 
1421ebc52fbSHong Zhang   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
14340781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
1441ebc52fbSHong Zhang   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
14540781036SHong Zhang 
14640781036SHong Zhang   ierr = PetscFree(stmp);CHKERRQ(ierr);
14740781036SHong Zhang   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
14840781036SHong Zhang 
14952e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
15052e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
15152e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
15252e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
15352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
15452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
15552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
15652e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
15740781036SHong Zhang 
15852e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
15940781036SHong Zhang   ierr = ISDestroy(from);CHKERRQ(ierr);
16040781036SHong Zhang   ierr = ISDestroy(to);CHKERRQ(ierr);
16140781036SHong Zhang   ierr = VecDestroy(gvec);CHKERRQ(ierr);
16240781036SHong Zhang   ierr = PetscFree(sgarray);CHKERRQ(ierr);
16340781036SHong Zhang   PetscFunctionReturn(0);
16440781036SHong Zhang }
16540781036SHong Zhang 
16640781036SHong Zhang #undef __FUNCT__
16740781036SHong Zhang #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
168dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
16940781036SHong Zhang {
1702896befeSSatish Balay   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
171a30f8f8cSSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
1726849ba73SBarry Smith   PetscErrorCode     ierr;
17313f74950SBarry Smith   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
174899cda47SBarry Smith   PetscInt           bs = mat->rmap.bs,*stmp;
175a30f8f8cSSatish Balay   IS                 from,to;
176a30f8f8cSSatish Balay   Vec                gvec;
177a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
178a30f8f8cSSatish Balay   PetscTable         gid1_lid1;
179a30f8f8cSSatish Balay   PetscTablePosition tpos;
18013f74950SBarry Smith   PetscInt           gid,lid;
1816f531f54SSatish Balay #else
18213f74950SBarry Smith   PetscInt           Nbs = baij->Nbs,*indices;
183a30f8f8cSSatish Balay #endif
184a30f8f8cSSatish Balay 
185a30f8f8cSSatish Balay   PetscFunctionBegin;
186a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
187a30f8f8cSSatish Balay   /* use a table - Mark Adams */
188a30f8f8cSSatish Balay   PetscTableCreate(B->mbs,&gid1_lid1);
189a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
190a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
19113f74950SBarry Smith       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
192a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
193a30f8f8cSSatish Balay       if (!data) {
194a30f8f8cSSatish Balay         /* one based table */
195a30f8f8cSSatish Balay         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
196a30f8f8cSSatish Balay       }
197a30f8f8cSSatish Balay     }
198a30f8f8cSSatish Balay   }
199a30f8f8cSSatish Balay   /* form array of columns we need */
20013f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
201a30f8f8cSSatish Balay   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
202a30f8f8cSSatish Balay   while (tpos) {
203a30f8f8cSSatish Balay     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
204a30f8f8cSSatish Balay     gid--; lid--;
205a30f8f8cSSatish Balay     garray[lid] = gid;
206a30f8f8cSSatish Balay   }
207a30f8f8cSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
208a30f8f8cSSatish Balay   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
209a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
210a30f8f8cSSatish Balay     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
211a30f8f8cSSatish Balay   }
212a30f8f8cSSatish Balay   /* compact out the extra columns in B */
213a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
214a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
21513f74950SBarry Smith       PetscInt gid1 = aj[B->i[i] + j] + 1;
216a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
217a30f8f8cSSatish Balay       lid --;
218a30f8f8cSSatish Balay       aj[B->i[i]+j] = lid;
219a30f8f8cSSatish Balay     }
220a30f8f8cSSatish Balay   }
221a30f8f8cSSatish Balay   B->nbs     = ec;
222*6857c123SSatish Balay   baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs;
223*6857c123SSatish Balay   ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr);
224a30f8f8cSSatish Balay   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
225a30f8f8cSSatish Balay   /* Mark Adams */
226a30f8f8cSSatish Balay #else
227a30f8f8cSSatish Balay   /* For the first stab we make an array as long as the number of columns */
228a30f8f8cSSatish Balay   /* mark those columns that are in baij->B */
22913f74950SBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
23013f74950SBarry Smith   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
231a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
232a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
233a30f8f8cSSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
234a30f8f8cSSatish Balay       indices[aj[B->i[i] + j] ] = 1;
235a30f8f8cSSatish Balay     }
236a30f8f8cSSatish Balay   }
237a30f8f8cSSatish Balay 
238a30f8f8cSSatish Balay   /* form array of columns we need */
23913f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
240f3ef73ceSBarry Smith   ec = 0;
241f3ef73ceSBarry Smith   for (i=0; i<Nbs; i++) {
242f3ef73ceSBarry Smith     if (indices[i]) {
243f3ef73ceSBarry Smith       garray[ec++] = i;
244f3ef73ceSBarry Smith     }
245f3ef73ceSBarry Smith   }
246a30f8f8cSSatish Balay 
247a30f8f8cSSatish Balay   /* make indices now point into garray */
248a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
249a30f8f8cSSatish Balay     indices[garray[i]] = i;
250a30f8f8cSSatish Balay   }
251a30f8f8cSSatish Balay 
252a30f8f8cSSatish Balay   /* compact out the extra columns in B */
253a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
254a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
255a30f8f8cSSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
256a30f8f8cSSatish Balay     }
257a30f8f8cSSatish Balay   }
258a30f8f8cSSatish Balay   B->nbs       = ec;
259899cda47SBarry Smith   baij->B->cmap.n   = ec*mat->rmap.bs;
260a30f8f8cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
261a30f8f8cSSatish Balay #endif
262633e10c7SHong Zhang 
263a30f8f8cSSatish Balay   /* create local vector that is used to scatter into */
264a30f8f8cSSatish Balay   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
265a30f8f8cSSatish Balay 
266a30f8f8cSSatish Balay   /* create two temporary index sets for building scatter-gather */
267eb7adc28SSatish Balay   for (i=0; i<ec; i++) {
268a30f8f8cSSatish Balay     garray[i] = bs*garray[i];
269a30f8f8cSSatish Balay   }
270a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
27186e15631SSatish Balay   for (i=0; i<ec; i++) {
272a30f8f8cSSatish Balay     garray[i] = garray[i]/bs;
273a30f8f8cSSatish Balay   }
274a30f8f8cSSatish Balay 
27513f74950SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
276a30f8f8cSSatish Balay   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
277a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
278a30f8f8cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
279a30f8f8cSSatish Balay 
280a30f8f8cSSatish Balay   /* create temporary global vector to generate scatter context */
281a30f8f8cSSatish Balay   /* this is inefficient, but otherwise we must do either
282a30f8f8cSSatish Balay      1) save garray until the first actual scatter when the vector is known or
283a30f8f8cSSatish Balay      2) have another way of generating a scatter context without a vector.*/
284899cda47SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
285a30f8f8cSSatish Balay 
286a30f8f8cSSatish Balay   /* gnerate the scatter context */
287a30f8f8cSSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
288a30f8f8cSSatish Balay 
289a30f8f8cSSatish Balay   /*
290a30f8f8cSSatish Balay       Post the receives for the first matrix vector product. We sync-chronize after
291a30f8f8cSSatish Balay     this on the chance that the user immediately calls MatMult() after assemblying
292a30f8f8cSSatish Balay     the matrix.
293a30f8f8cSSatish Balay   */
294a30f8f8cSSatish Balay   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
295a30f8f8cSSatish Balay   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
296a30f8f8cSSatish Balay 
29752e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
29852e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
29952e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
30052e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
301a30f8f8cSSatish Balay   baij->garray = garray;
30252e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
303a30f8f8cSSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
304a30f8f8cSSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
305a30f8f8cSSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
306a30f8f8cSSatish Balay   PetscFunctionReturn(0);
307a30f8f8cSSatish Balay }
308a30f8f8cSSatish Balay 
309a30f8f8cSSatish Balay /*
31001b2bd88SHong Zhang      Takes the local part of an already assembled MPISBAIJ matrix
311a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
312a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
313a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
314a30f8f8cSSatish Balay 
315a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
316a30f8f8cSSatish Balay    they are sloppy.
317a30f8f8cSSatish Balay */
3184a2ae208SSatish Balay #undef __FUNCT__
3194a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ"
320dfbe8321SBarry Smith PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
321a30f8f8cSSatish Balay {
3222896befeSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
323a30f8f8cSSatish Balay   Mat            B = baij->B,Bnew;
324a30f8f8cSSatish Balay   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
325dfbe8321SBarry Smith   PetscErrorCode ierr;
326899cda47SBarry Smith   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray;
327899cda47SBarry Smith   PetscInt       k,bs=A->rmap.bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap.n;
328a30f8f8cSSatish Balay   MatScalar      *a = Bbaij->a;
32987828ca2SBarry Smith   PetscScalar    *atmp;
33082502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
33113f74950SBarry Smith   PetscInt       l;
332a30f8f8cSSatish Balay #endif
333a30f8f8cSSatish Balay 
334a30f8f8cSSatish Balay   PetscFunctionBegin;
33582502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
336899cda47SBarry Smith   ierr = PetscMalloc(A->rmap.bs*sizeof(PetscScalar),&atmp);
33782502324SSatish Balay #endif
338a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
339b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
34001b2bd88SHong Zhang   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
34101b2bd88SHong Zhang   baij->lvec = 0;
34201b2bd88SHong Zhang   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
34301b2bd88SHong Zhang   baij->Mvctx = 0;
34401b2bd88SHong Zhang 
34501b2bd88SHong Zhang   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
34601b2bd88SHong Zhang   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
34701b2bd88SHong Zhang   baij->slvec0 = 0;
34801b2bd88SHong Zhang   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
34901b2bd88SHong Zhang   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
35001b2bd88SHong Zhang   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
35101b2bd88SHong Zhang   baij->slvec1 = 0;
35201b2bd88SHong Zhang 
353a30f8f8cSSatish Balay   if (baij->colmap) {
354a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
355a30f8f8cSSatish Balay     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
356a30f8f8cSSatish Balay #else
357a30f8f8cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
358a30f8f8cSSatish Balay     baij->colmap = 0;
35952e6d16bSBarry Smith     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
360a30f8f8cSSatish Balay #endif
361a30f8f8cSSatish Balay   }
362a30f8f8cSSatish Balay 
363a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
364a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
365a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
366a30f8f8cSSatish Balay 
367a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
36813f74950SBarry Smith   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
369a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
370a30f8f8cSSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
371a30f8f8cSSatish Balay   }
372f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
373f69a0ea3SMatthew Knepley   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
374be5d1d56SKris Buschelman   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
375899cda47SBarry Smith   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr);
376a30f8f8cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
377a30f8f8cSSatish Balay 
37813f74950SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
379a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
380a30f8f8cSSatish Balay     rvals[0] = bs*i;
381a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
382a30f8f8cSSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
383a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]]*bs;
384a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
385a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
386a30f8f8cSSatish Balay         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
387a30f8f8cSSatish Balay #else
38871730473SSatish Balay         atmp = a+j*bs2 + k*bs;
389a30f8f8cSSatish Balay #endif
390c8407628SSatish Balay         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
391a30f8f8cSSatish Balay         col++;
392a30f8f8cSSatish Balay       }
393a30f8f8cSSatish Balay     }
394a30f8f8cSSatish Balay   }
395a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
396a30f8f8cSSatish Balay   ierr = PetscFree(atmp);CHKERRQ(ierr);
397a30f8f8cSSatish Balay #endif
398a30f8f8cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
399a30f8f8cSSatish Balay   baij->garray = 0;
400a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
40152e6d16bSBarry Smith   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
402a30f8f8cSSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
40352e6d16bSBarry Smith   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
404a30f8f8cSSatish Balay   baij->B = Bnew;
405a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
406a30f8f8cSSatish Balay   PetscFunctionReturn(0);
407a30f8f8cSSatish Balay }
408a30f8f8cSSatish Balay 
409a30f8f8cSSatish Balay 
410a30f8f8cSSatish Balay 
411