xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 40781036cb61568352ccff04d60a627167f497bd)
173f4d377SMatthew Knepley /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay /*
4a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
5a30f8f8cSSatish Balay */
62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
887828ca2SBarry Smith extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9a30f8f8cSSatish Balay 
104a2ae208SSatish Balay #undef __FUNCT__
114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat)
13a30f8f8cSSatish Balay {
14*40781036SHong Zhang   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
15*40781036SHong Zhang   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16*40781036SHong Zhang   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
17*40781036SHong Zhang   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
18*40781036SHong Zhang   IS                 from,to;
19*40781036SHong Zhang   Vec                gvec;
20*40781036SHong Zhang   int                rank=sbaij->rank,lsize,size=sbaij->size;
21*40781036SHong Zhang   int                *owners=sbaij->rowners,*sowners,*ec_owner,prank=100,k;
22*40781036SHong Zhang   PetscMap           vecmap;
23*40781036SHong Zhang   PetscScalar        *ptr;
24*40781036SHong Zhang 
25*40781036SHong Zhang   PetscFunctionBegin;
26*40781036SHong Zhang   /*
27*40781036SHong Zhang   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], MatSetUpMultiply_MPISBAIJ is called ...\n",rank);
28*40781036SHong Zhang   PetscSynchronizedFlush(PETSC_COMM_WORLD);
29*40781036SHong Zhang   */
30*40781036SHong Zhang   ierr = PetscOptionsGetInt(PETSC_NULL,"-prank",&prank,PETSC_NULL);CHKERRQ(ierr);
31*40781036SHong Zhang   if (sbaij->lvec) {
32*40781036SHong Zhang     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
33*40781036SHong Zhang     sbaij->lvec = 0;
34*40781036SHong Zhang   }
35*40781036SHong Zhang   if (sbaij->Mvctx) {
36*40781036SHong Zhang     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
37*40781036SHong Zhang     sbaij->Mvctx = 0;
38*40781036SHong Zhang   }
39*40781036SHong Zhang 
40*40781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
41*40781036SHong Zhang   /* mark those columns that are in sbaij->B */
42*40781036SHong Zhang   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
43*40781036SHong Zhang   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
44*40781036SHong Zhang   for (i=0; i<mbs; i++) {
45*40781036SHong Zhang     for (j=0; j<B->ilen[i]; j++) {
46*40781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
47*40781036SHong Zhang       indices[aj[B->i[i] + j] ] = 1;
48*40781036SHong Zhang     }
49*40781036SHong Zhang   }
50*40781036SHong Zhang 
51*40781036SHong Zhang   /* form arrays of columns we need */
52*40781036SHong Zhang   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
53*40781036SHong Zhang   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
54*40781036SHong Zhang   ec_owner = sgarray + 2*ec;
55*40781036SHong Zhang 
56*40781036SHong Zhang   ec = 0;
57*40781036SHong Zhang   for (j=0; j<size; j++){
58*40781036SHong Zhang     for (i=owners[j]; i<owners[j+1]; i++){
59*40781036SHong Zhang       if (indices[i]) {
60*40781036SHong Zhang         garray[ec]   = i;
61*40781036SHong Zhang         ec_owner[ec] = j;
62*40781036SHong Zhang         ec++;
63*40781036SHong Zhang       }
64*40781036SHong Zhang     }
65*40781036SHong Zhang   }
66*40781036SHong Zhang 
67*40781036SHong Zhang   if (rank == prank){
68*40781036SHong Zhang     printf("proc[%d]: \n",rank);
69*40781036SHong Zhang     for (i=0; i<ec;i++)  printf("[%d]: garray = %d, ec_owner = %d\n", i,garray[i], ec_owner[i]);
70*40781036SHong Zhang   }
71*40781036SHong Zhang 
72*40781036SHong Zhang   /* make indices now point into garray */
73*40781036SHong Zhang   for (i=0; i<ec; i++) {
74*40781036SHong Zhang     indices[garray[i]] = i;
75*40781036SHong Zhang   }
76*40781036SHong Zhang 
77*40781036SHong Zhang   /* compact out the extra columns in B */
78*40781036SHong Zhang   for (i=0; i<mbs; i++) {
79*40781036SHong Zhang     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
80*40781036SHong Zhang   }
81*40781036SHong Zhang   B->nbs       = ec;
82*40781036SHong Zhang   sbaij->B->n   = ec*B->bs;
83*40781036SHong Zhang   ierr = PetscFree(indices);CHKERRQ(ierr);
84*40781036SHong Zhang 
85*40781036SHong Zhang   /* create local vector that is used to scatter into */
86*40781036SHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
87*40781036SHong Zhang 
88*40781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
89*40781036SHong Zhang   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
90*40781036SHong Zhang   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
91*40781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
92*40781036SHong Zhang 
93*40781036SHong Zhang   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
94*40781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
95*40781036SHong Zhang 
96*40781036SHong Zhang   if (rank == prank){
97*40781036SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," from: ");CHKERRQ(ierr);
98*40781036SHong Zhang     ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
99*40781036SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," to:");CHKERRQ(ierr);
100*40781036SHong Zhang     ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
101*40781036SHong Zhang   }
102*40781036SHong Zhang 
103*40781036SHong Zhang   /* generate the scatter context */
104*40781036SHong Zhang   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
105*40781036SHong Zhang   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
106*40781036SHong Zhang   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
107*40781036SHong Zhang 
108*40781036SHong Zhang   sbaij->garray = garray;
109*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->Mvctx);
110*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->lvec);
111*40781036SHong Zhang   PetscLogObjectParent(mat,from);
112*40781036SHong Zhang   PetscLogObjectParent(mat,to);
113*40781036SHong Zhang 
114*40781036SHong Zhang   ierr = ISDestroy(from);CHKERRQ(ierr);
115*40781036SHong Zhang   ierr = ISDestroy(to);CHKERRQ(ierr);
116*40781036SHong Zhang 
117*40781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
118*40781036SHong Zhang   lsize = (mbs + ec)*bs;
119*40781036SHong Zhang   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
120*40781036SHong Zhang   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
121*40781036SHong Zhang   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
122*40781036SHong Zhang 
123*40781036SHong Zhang   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
124*40781036SHong Zhang   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
125*40781036SHong Zhang   /* for (i=0; i<=size;i++) sowners[i] /= bs; */
126*40781036SHong Zhang 
127*40781036SHong Zhang   if (rank==prank){
128*40781036SHong Zhang     printf("length of slvec0: \n", vec_size);
129*40781036SHong Zhang     for (i=0; i<=size;i++)  printf("[%d]: owners = %d, sowners/bs = %d\n", i,owners[i], sowners[i]/bs);
130*40781036SHong Zhang   }
131*40781036SHong Zhang 
132*40781036SHong Zhang   /* x index in the IS sfrom */
133*40781036SHong Zhang   for (i=0; i<ec; i++) {
134*40781036SHong Zhang     j = ec_owner[i];
135*40781036SHong Zhang     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
136*40781036SHong Zhang   }
137*40781036SHong Zhang   /* b index in the IS sfrom */
138*40781036SHong Zhang   k = sowners[rank]/bs + mbs;
139*40781036SHong Zhang   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
140*40781036SHong Zhang 
141*40781036SHong Zhang   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
142*40781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
143*40781036SHong Zhang 
144*40781036SHong Zhang   /* x index in the IS sto */
145*40781036SHong Zhang   k = sowners[rank]/bs + mbs;
146*40781036SHong Zhang   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
147*40781036SHong Zhang   /* b index in the IS sto */
148*40781036SHong Zhang   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
149*40781036SHong Zhang 
150*40781036SHong Zhang   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
151*40781036SHong Zhang 
152*40781036SHong Zhang   if (rank == prank){
153*40781036SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," sfrom: ");CHKERRQ(ierr);
154*40781036SHong Zhang     ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
155*40781036SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," sto:");CHKERRQ(ierr);
156*40781036SHong Zhang     ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
157*40781036SHong Zhang   }
158*40781036SHong Zhang 
159*40781036SHong Zhang   /* gnerate the SBAIJ scatter context */
160*40781036SHong Zhang   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
161*40781036SHong Zhang 
162*40781036SHong Zhang    /*
163*40781036SHong Zhang       Post the receives for the first matrix vector product. We sync-chronize after
164*40781036SHong Zhang     this on the chance that the user immediately calls MatMult() after assemblying
165*40781036SHong Zhang     the matrix.
166*40781036SHong Zhang   */
167*40781036SHong Zhang   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
168*40781036SHong Zhang 
169*40781036SHong Zhang   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
170*40781036SHong Zhang   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
171*40781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
172*40781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
173*40781036SHong Zhang   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
174*40781036SHong Zhang 
175*40781036SHong Zhang   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
176*40781036SHong Zhang   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
177*40781036SHong Zhang   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
178*40781036SHong Zhang 
179*40781036SHong Zhang   ierr = PetscFree(stmp);CHKERRQ(ierr);
180*40781036SHong Zhang   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
181*40781036SHong Zhang 
182*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->sMvctx);
183*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->slvec0);
184*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->slvec1);
185*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->slvec0b);
186*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->slvec1a);
187*40781036SHong Zhang   PetscLogObjectParent(mat,sbaij->slvec1b);
188*40781036SHong Zhang   PetscLogObjectParent(mat,from);
189*40781036SHong Zhang   PetscLogObjectParent(mat,to);
190*40781036SHong Zhang 
191*40781036SHong Zhang   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
192*40781036SHong Zhang   ierr = ISDestroy(from);CHKERRQ(ierr);
193*40781036SHong Zhang   ierr = ISDestroy(to);CHKERRQ(ierr);
194*40781036SHong Zhang   ierr = VecDestroy(gvec);CHKERRQ(ierr);
195*40781036SHong Zhang   ierr = PetscFree(sgarray);CHKERRQ(ierr);
196*40781036SHong Zhang 
197*40781036SHong Zhang   PetscFunctionReturn(0);
198*40781036SHong Zhang }
199*40781036SHong Zhang 
200*40781036SHong Zhang #undef __FUNCT__
201*40781036SHong Zhang #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
202*40781036SHong Zhang int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
203*40781036SHong Zhang {
2042896befeSSatish Balay   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
205a30f8f8cSSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
206a30f8f8cSSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
20786e15631SSatish Balay   int                bs = baij->bs,*stmp;
208a30f8f8cSSatish Balay   IS                 from,to;
209a30f8f8cSSatish Balay   Vec                gvec;
210a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
211a30f8f8cSSatish Balay   PetscTable         gid1_lid1;
212a30f8f8cSSatish Balay   PetscTablePosition tpos;
213a30f8f8cSSatish Balay   int                gid,lid;
214a30f8f8cSSatish Balay #endif
215a30f8f8cSSatish Balay 
216a30f8f8cSSatish Balay   PetscFunctionBegin;
217a30f8f8cSSatish Balay 
218a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
219a30f8f8cSSatish Balay   /* use a table - Mark Adams */
220a30f8f8cSSatish Balay   PetscTableCreate(B->mbs,&gid1_lid1);
221a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
222a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
223a30f8f8cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
224a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
225a30f8f8cSSatish Balay       if (!data) {
226a30f8f8cSSatish Balay         /* one based table */
227a30f8f8cSSatish Balay         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
228a30f8f8cSSatish Balay       }
229a30f8f8cSSatish Balay     }
230a30f8f8cSSatish Balay   }
231a30f8f8cSSatish Balay   /* form array of columns we need */
232b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
233a30f8f8cSSatish Balay   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
234a30f8f8cSSatish Balay   while (tpos) {
235a30f8f8cSSatish Balay     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
236a30f8f8cSSatish Balay     gid--; lid--;
237a30f8f8cSSatish Balay     garray[lid] = gid;
238a30f8f8cSSatish Balay   }
239a30f8f8cSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
240a30f8f8cSSatish Balay   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
241a30f8f8cSSatish Balay   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
242a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
243a30f8f8cSSatish Balay     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
244a30f8f8cSSatish Balay   }
245a30f8f8cSSatish Balay   /* compact out the extra columns in B */
246a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
247a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
248a30f8f8cSSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
249a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
250a30f8f8cSSatish Balay       lid --;
251a30f8f8cSSatish Balay       aj[B->i[i]+j] = lid;
252a30f8f8cSSatish Balay     }
253a30f8f8cSSatish Balay   }
254a30f8f8cSSatish Balay   B->nbs     = ec;
255273d9f13SBarry Smith   baij->B->n = ec*B->bs;
256a30f8f8cSSatish Balay   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
257a30f8f8cSSatish Balay   /* Mark Adams */
258a30f8f8cSSatish Balay #else
259a30f8f8cSSatish Balay   /* For the first stab we make an array as long as the number of columns */
260a30f8f8cSSatish Balay   /* mark those columns that are in baij->B */
261b0a32e0cSBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
262a30f8f8cSSatish Balay   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
263a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
264a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
265a30f8f8cSSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
266a30f8f8cSSatish Balay       indices[aj[B->i[i] + j] ] = 1;
267a30f8f8cSSatish Balay     }
268a30f8f8cSSatish Balay   }
269a30f8f8cSSatish Balay 
270a30f8f8cSSatish Balay   /* form array of columns we need */
271b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
272f3ef73ceSBarry Smith   ec = 0;
273f3ef73ceSBarry Smith   for (i=0; i<Nbs; i++) {
274f3ef73ceSBarry Smith     if (indices[i]) {
275f3ef73ceSBarry Smith       garray[ec++] = i;
276f3ef73ceSBarry Smith     }
277f3ef73ceSBarry Smith   }
278a30f8f8cSSatish Balay 
279a30f8f8cSSatish Balay   /* make indices now point into garray */
280a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
281a30f8f8cSSatish Balay     indices[garray[i]] = i;
282a30f8f8cSSatish Balay   }
283a30f8f8cSSatish Balay 
284a30f8f8cSSatish Balay   /* compact out the extra columns in B */
285a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
286a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
287a30f8f8cSSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
288a30f8f8cSSatish Balay     }
289a30f8f8cSSatish Balay   }
290a30f8f8cSSatish Balay   B->nbs       = ec;
291273d9f13SBarry Smith   baij->B->n   = ec*B->bs;
292a30f8f8cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
293a30f8f8cSSatish Balay #endif
294633e10c7SHong Zhang 
295a30f8f8cSSatish Balay   /* create local vector that is used to scatter into */
296a30f8f8cSSatish Balay   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
297a30f8f8cSSatish Balay 
298a30f8f8cSSatish Balay   /* create two temporary index sets for building scatter-gather */
299eb7adc28SSatish Balay   for (i=0; i<ec; i++) {
300a30f8f8cSSatish Balay     garray[i] = bs*garray[i];
301a30f8f8cSSatish Balay   }
302a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
30386e15631SSatish Balay   for (i=0; i<ec; i++) {
304a30f8f8cSSatish Balay     garray[i] = garray[i]/bs;
305a30f8f8cSSatish Balay   }
306a30f8f8cSSatish Balay 
307b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
308a30f8f8cSSatish Balay   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
309a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
310a30f8f8cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
311a30f8f8cSSatish Balay 
312a30f8f8cSSatish Balay   /* create temporary global vector to generate scatter context */
313a30f8f8cSSatish Balay   /* this is inefficient, but otherwise we must do either
314a30f8f8cSSatish Balay      1) save garray until the first actual scatter when the vector is known or
315a30f8f8cSSatish Balay      2) have another way of generating a scatter context without a vector.*/
316273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
317a30f8f8cSSatish Balay 
318a30f8f8cSSatish Balay   /* gnerate the scatter context */
319a30f8f8cSSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
320a30f8f8cSSatish Balay 
321a30f8f8cSSatish Balay   /*
322a30f8f8cSSatish Balay       Post the receives for the first matrix vector product. We sync-chronize after
323a30f8f8cSSatish Balay     this on the chance that the user immediately calls MatMult() after assemblying
324a30f8f8cSSatish Balay     the matrix.
325a30f8f8cSSatish Balay   */
326a30f8f8cSSatish Balay   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
327a30f8f8cSSatish Balay   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
328a30f8f8cSSatish Balay 
329b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->Mvctx);
330b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->lvec);
331b0a32e0cSBarry Smith   PetscLogObjectParent(mat,from);
332b0a32e0cSBarry Smith   PetscLogObjectParent(mat,to);
333a30f8f8cSSatish Balay   baij->garray = garray;
334b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
335a30f8f8cSSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
336a30f8f8cSSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
337a30f8f8cSSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
338a30f8f8cSSatish Balay   PetscFunctionReturn(0);
339a30f8f8cSSatish Balay }
340a30f8f8cSSatish Balay 
341a30f8f8cSSatish Balay 
342a30f8f8cSSatish Balay /*
343a30f8f8cSSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
344a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
345a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
346a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
347a30f8f8cSSatish Balay 
348a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
349a30f8f8cSSatish Balay    they are sloppy.
350a30f8f8cSSatish Balay */
3514a2ae208SSatish Balay #undef __FUNCT__
3524a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ"
353a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A)
354a30f8f8cSSatish Balay {
3552896befeSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
356a30f8f8cSSatish Balay   Mat           B = baij->B,Bnew;
357a30f8f8cSSatish Balay   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
358273d9f13SBarry Smith   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
359273d9f13SBarry Smith   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
360a30f8f8cSSatish Balay   MatScalar     *a = Bbaij->a;
36187828ca2SBarry Smith   PetscScalar   *atmp;
36282502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
36382502324SSatish Balay   int           l;
364a30f8f8cSSatish Balay #endif
365a30f8f8cSSatish Balay 
366a30f8f8cSSatish Balay   PetscFunctionBegin;
36782502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
36887828ca2SBarry Smith   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
36982502324SSatish Balay #endif
370a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
371b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
372a30f8f8cSSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
373a30f8f8cSSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
374a30f8f8cSSatish Balay   if (baij->colmap) {
375a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
376a30f8f8cSSatish Balay     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
377a30f8f8cSSatish Balay #else
378a30f8f8cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
379a30f8f8cSSatish Balay     baij->colmap = 0;
380b0a32e0cSBarry Smith     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
381a30f8f8cSSatish Balay #endif
382a30f8f8cSSatish Balay   }
383a30f8f8cSSatish Balay 
384a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
385a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
386a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
387a30f8f8cSSatish Balay 
388a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
38982502324SSatish Balay   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
390a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
391a30f8f8cSSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
392a30f8f8cSSatish Balay   }
393a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
394a30f8f8cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
395a30f8f8cSSatish Balay 
39682502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
397a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
398a30f8f8cSSatish Balay     rvals[0] = bs*i;
399a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
400a30f8f8cSSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
401a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]]*bs;
402a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
403a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
404a30f8f8cSSatish Balay         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
405a30f8f8cSSatish Balay #else
40671730473SSatish Balay         atmp = a+j*bs2 + k*bs;
407a30f8f8cSSatish Balay #endif
408c8407628SSatish Balay         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
409a30f8f8cSSatish Balay         col++;
410a30f8f8cSSatish Balay       }
411a30f8f8cSSatish Balay     }
412a30f8f8cSSatish Balay   }
413a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
414a30f8f8cSSatish Balay   ierr = PetscFree(atmp);CHKERRQ(ierr);
415a30f8f8cSSatish Balay #endif
416a30f8f8cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
417a30f8f8cSSatish Balay   baij->garray = 0;
418a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
419b0a32e0cSBarry Smith   PetscLogObjectMemory(A,-ec*sizeof(int));
420a30f8f8cSSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
421b0a32e0cSBarry Smith   PetscLogObjectParent(A,Bnew);
422a30f8f8cSSatish Balay   baij->B = Bnew;
423a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
424a30f8f8cSSatish Balay   PetscFunctionReturn(0);
425a30f8f8cSSatish Balay }
426a30f8f8cSSatish Balay 
427a30f8f8cSSatish Balay 
428a30f8f8cSSatish Balay 
429