xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
6 
7 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
17   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
18   IS                 from,to;
19   Vec                gvec;
20   int                rank=sbaij->rank,lsize,size=sbaij->size;
21   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
22   PetscMap           vecmap;
23   PetscScalar        *ptr;
24 
25   PetscFunctionBegin;
26   if (sbaij->lvec) {
27     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
28     sbaij->lvec = 0;
29   }
30   if (sbaij->Mvctx) {
31     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
32     sbaij->Mvctx = 0;
33   }
34 
35   /* For the first stab we make an array as long as the number of columns */
36   /* mark those columns that are in sbaij->B */
37   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
38   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<mbs; i++) {
40     for (j=0; j<B->ilen[i]; j++) {
41       if (!indices[aj[B->i[i] + j]]) ec++;
42       indices[aj[B->i[i] + j] ] = 1;
43     }
44   }
45 
46   /* form arrays of columns we need */
47   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
48   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
49   ec_owner = sgarray + 2*ec;
50 
51   ec = 0;
52   for (j=0; j<size; j++){
53     for (i=owners[j]; i<owners[j+1]; i++){
54       if (indices[i]) {
55         garray[ec]   = i;
56         ec_owner[ec] = j;
57         ec++;
58       }
59     }
60   }
61 
62   /* make indices now point into garray */
63   for (i=0; i<ec; i++) indices[garray[i]] = i;
64 
65   /* compact out the extra columns in B */
66   for (i=0; i<mbs; i++) {
67     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
68   }
69   B->nbs      = ec;
70   sbaij->B->n = ec*B->bs;
71   ierr = PetscFree(indices);CHKERRQ(ierr);
72 
73   /* create local vector that is used to scatter into */
74   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
75 
76   /* create two temporary index sets for building scatter-gather */
77   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
78   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
79   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
80 
81   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
82   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
83 
84   /* generate the scatter context
85      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
86   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
87   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
88   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
89 
90   sbaij->garray = garray;
91   PetscLogObjectParent(mat,sbaij->Mvctx);
92   PetscLogObjectParent(mat,sbaij->lvec);
93   PetscLogObjectParent(mat,from);
94   PetscLogObjectParent(mat,to);
95 
96   ierr = ISDestroy(from);CHKERRQ(ierr);
97   ierr = ISDestroy(to);CHKERRQ(ierr);
98 
99   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
100   lsize = (mbs + ec)*bs;
101   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
102   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
103   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
104 
105   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
106   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
107 
108   /* x index in the IS sfrom */
109   for (i=0; i<ec; i++) {
110     j = ec_owner[i];
111     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
112   }
113   /* b index in the IS sfrom */
114   k = sowners[rank]/bs + mbs;
115   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
116 
117   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
119 
120   /* x index in the IS sto */
121   k = sowners[rank]/bs + mbs;
122   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
123   /* b index in the IS sto */
124   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
125 
126   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
127 
128   /* gnerate the SBAIJ scatter context */
129   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
130 
131    /*
132       Post the receives for the first matrix vector product. We sync-chronize after
133     this on the chance that the user immediately calls MatMult() after assemblying
134     the matrix.
135   */
136   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
137 
138   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
139   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
140   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
141   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
142   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
143 
144   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
145   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
146   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
147 
148   ierr = PetscFree(stmp);CHKERRQ(ierr);
149   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
150 
151   PetscLogObjectParent(mat,sbaij->sMvctx);
152   PetscLogObjectParent(mat,sbaij->slvec0);
153   PetscLogObjectParent(mat,sbaij->slvec1);
154   PetscLogObjectParent(mat,sbaij->slvec0b);
155   PetscLogObjectParent(mat,sbaij->slvec1a);
156   PetscLogObjectParent(mat,sbaij->slvec1b);
157   PetscLogObjectParent(mat,from);
158   PetscLogObjectParent(mat,to);
159 
160   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
161   ierr = ISDestroy(from);CHKERRQ(ierr);
162   ierr = ISDestroy(to);CHKERRQ(ierr);
163   ierr = VecDestroy(gvec);CHKERRQ(ierr);
164   ierr = PetscFree(sgarray);CHKERRQ(ierr);
165 
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
171 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
172 {
173   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
174   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
175   int                i,j,*aj = B->j,ierr,ec = 0,*garray;
176   int                bs = baij->bs,*stmp;
177   IS                 from,to;
178   Vec                gvec;
179 #if defined (PETSC_USE_CTABLE)
180   PetscTable         gid1_lid1;
181   PetscTablePosition tpos;
182   int                gid,lid;
183 #else
184   int                Nbs = baij->Nbs,*indices;
185 #endif
186 
187   PetscFunctionBegin;
188 
189 #if defined (PETSC_USE_CTABLE)
190   /* use a table - Mark Adams */
191   PetscTableCreate(B->mbs,&gid1_lid1);
192   for (i=0; i<B->mbs; i++) {
193     for (j=0; j<B->ilen[i]; j++) {
194       int data,gid1 = aj[B->i[i]+j] + 1;
195       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
196       if (!data) {
197         /* one based table */
198         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
199       }
200     }
201   }
202   /* form array of columns we need */
203   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
204   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
205   while (tpos) {
206     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
207     gid--; lid--;
208     garray[lid] = gid;
209   }
210   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
211   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
212   for (i=0; i<ec; i++) {
213     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
214   }
215   /* compact out the extra columns in B */
216   for (i=0; i<B->mbs; i++) {
217     for (j=0; j<B->ilen[i]; j++) {
218       int gid1 = aj[B->i[i] + j] + 1;
219       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
220       lid --;
221       aj[B->i[i]+j] = lid;
222     }
223   }
224   B->nbs     = ec;
225   baij->B->n = ec*B->bs;
226   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
227   /* Mark Adams */
228 #else
229   /* For the first stab we make an array as long as the number of columns */
230   /* mark those columns that are in baij->B */
231   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
232   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
233   for (i=0; i<B->mbs; i++) {
234     for (j=0; j<B->ilen[i]; j++) {
235       if (!indices[aj[B->i[i] + j]]) ec++;
236       indices[aj[B->i[i] + j] ] = 1;
237     }
238   }
239 
240   /* form array of columns we need */
241   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
242   ec = 0;
243   for (i=0; i<Nbs; i++) {
244     if (indices[i]) {
245       garray[ec++] = i;
246     }
247   }
248 
249   /* make indices now point into garray */
250   for (i=0; i<ec; i++) {
251     indices[garray[i]] = i;
252   }
253 
254   /* compact out the extra columns in B */
255   for (i=0; i<B->mbs; i++) {
256     for (j=0; j<B->ilen[i]; j++) {
257       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
258     }
259   }
260   B->nbs       = ec;
261   baij->B->n   = ec*B->bs;
262   ierr = PetscFree(indices);CHKERRQ(ierr);
263 #endif
264 
265   /* create local vector that is used to scatter into */
266   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
267 
268   /* create two temporary index sets for building scatter-gather */
269   for (i=0; i<ec; i++) {
270     garray[i] = bs*garray[i];
271   }
272   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
273   for (i=0; i<ec; i++) {
274     garray[i] = garray[i]/bs;
275   }
276 
277   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
278   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
279   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
280   ierr = PetscFree(stmp);CHKERRQ(ierr);
281 
282   /* create temporary global vector to generate scatter context */
283   /* this is inefficient, but otherwise we must do either
284      1) save garray until the first actual scatter when the vector is known or
285      2) have another way of generating a scatter context without a vector.*/
286   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
287 
288   /* gnerate the scatter context */
289   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
290 
291   /*
292       Post the receives for the first matrix vector product. We sync-chronize after
293     this on the chance that the user immediately calls MatMult() after assemblying
294     the matrix.
295   */
296   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
297   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
298 
299   PetscLogObjectParent(mat,baij->Mvctx);
300   PetscLogObjectParent(mat,baij->lvec);
301   PetscLogObjectParent(mat,from);
302   PetscLogObjectParent(mat,to);
303   baij->garray = garray;
304   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
305   ierr = ISDestroy(from);CHKERRQ(ierr);
306   ierr = ISDestroy(to);CHKERRQ(ierr);
307   ierr = VecDestroy(gvec);CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 
312 /*
313      Takes the local part of an already assembled MPIBAIJ matrix
314    and disassembles it. This is to allow new nonzeros into the matrix
315    that require more communication in the matrix vector multiply.
316    Thus certain data-structures must be rebuilt.
317 
318    Kind of slow! But that's what application programmers get when
319    they are sloppy.
320 */
321 #undef __FUNCT__
322 #define __FUNCT__ "DisAssemble_MPISBAIJ"
323 int DisAssemble_MPISBAIJ(Mat A)
324 {
325   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
326   Mat           B = baij->B,Bnew;
327   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
328   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
329   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
330   MatScalar     *a = Bbaij->a;
331   PetscScalar   *atmp;
332 #if defined(PETSC_USE_MAT_SINGLE)
333   int           l;
334 #endif
335 
336   PetscFunctionBegin;
337 #if defined(PETSC_USE_MAT_SINGLE)
338   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
339 #endif
340   /* free stuff related to matrix-vec multiply */
341   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
342   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
343   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
344   if (baij->colmap) {
345 #if defined (PETSC_USE_CTABLE)
346     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
347 #else
348     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
349     baij->colmap = 0;
350     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
351 #endif
352   }
353 
354   /* make sure that B is assembled so we can access its values */
355   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
357 
358   /* invent new B and copy stuff over */
359   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
360   for (i=0; i<mbs; i++) {
361     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
362   }
363   ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr);
364   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
365   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
366   ierr = PetscFree(nz);CHKERRQ(ierr);
367 
368   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
369   for (i=0; i<mbs; i++) {
370     rvals[0] = bs*i;
371     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
372     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
373       col = garray[Bbaij->j[j]]*bs;
374       for (k=0; k<bs; k++) {
375 #if defined(PETSC_USE_MAT_SINGLE)
376         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
377 #else
378         atmp = a+j*bs2 + k*bs;
379 #endif
380         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
381         col++;
382       }
383     }
384   }
385 #if defined(PETSC_USE_MAT_SINGLE)
386   ierr = PetscFree(atmp);CHKERRQ(ierr);
387 #endif
388   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
389   baij->garray = 0;
390   ierr = PetscFree(rvals);CHKERRQ(ierr);
391   PetscLogObjectMemory(A,-ec*sizeof(int));
392   ierr = MatDestroy(B);CHKERRQ(ierr);
393   PetscLogObjectParent(A,Bnew);
394   baij->B = Bnew;
395   A->was_assembled = PETSC_FALSE;
396   PetscFunctionReturn(0);
397 }
398 
399 
400 
401