xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 7783f70d43cdcceffdb2c71b5fced1e99925dfc2)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->rmap.bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
47   ec_owner = sgarray + 2*ec;
48 
49   ec = 0;
50   for (j=0; j<size; j++){
51     for (i=owners[j]; i<owners[j+1]; i++){
52       if (indices[i]) {
53         garray[ec]   = i;
54         ec_owner[ec] = j;
55         ec++;
56       }
57     }
58   }
59 
60   /* make indices now point into garray */
61   for (i=0; i<ec; i++) indices[garray[i]] = i;
62 
63   /* compact out the extra columns in B */
64   for (i=0; i<mbs; i++) {
65     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
66   }
67   B->nbs      = ec;
68   sbaij->B->cmap.n = sbaij->B->cmap.N = ec*mat->rmap.bs;
69   ierr = PetscMapInitialize(sbaij->B->comm,&(sbaij->B->cmap));CHKERRQ(ierr);
70   ierr = PetscFree(indices);CHKERRQ(ierr);
71 
72   /* create local vector that is used to scatter into */
73   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
74 
75   /* create two temporary index sets for building scatter-gather */
76   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
77   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
78   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
79 
80   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
81   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
82 
83   /* generate the scatter context
84      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
85   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
86   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
87   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
88 
89   sbaij->garray = garray;
90   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
91   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
92   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
93   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
94 
95   ierr = ISDestroy(from);CHKERRQ(ierr);
96   ierr = ISDestroy(to);CHKERRQ(ierr);
97 
98   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
99   lsize = (mbs + ec)*bs;
100   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
101   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
102   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
103 
104   sowners = sbaij->slvec0->map.range;
105 
106   /* x index in the IS sfrom */
107   for (i=0; i<ec; i++) {
108     j = ec_owner[i];
109     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
110   }
111   /* b index in the IS sfrom */
112   k = sowners[rank]/bs + mbs;
113   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
114 
115   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
117 
118   /* x index in the IS sto */
119   k = sowners[rank]/bs + mbs;
120   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
121   /* b index in the IS sto */
122   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
123 
124   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
125 
126   /* gnerate the SBAIJ scatter context */
127   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
128 
129    /*
130       Post the receives for the first matrix vector product. We sync-chronize after
131     this on the chance that the user immediately calls MatMult() after assemblying
132     the matrix.
133   */
134   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
135 
136   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
137   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
138   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
139   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
140   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
141 
142   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
143   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
144   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
145 
146   ierr = PetscFree(stmp);CHKERRQ(ierr);
147   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
148 
149   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
150   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
151   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
152   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
156   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
157 
158   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
159   ierr = ISDestroy(from);CHKERRQ(ierr);
160   ierr = ISDestroy(to);CHKERRQ(ierr);
161   ierr = VecDestroy(gvec);CHKERRQ(ierr);
162   ierr = PetscFree(sgarray);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNCT__
167 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
168 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
169 {
170   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
171   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
172   PetscErrorCode     ierr;
173   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
174   PetscInt           bs = mat->rmap.bs,*stmp;
175   IS                 from,to;
176   Vec                gvec;
177 #if defined (PETSC_USE_CTABLE)
178   PetscTable         gid1_lid1;
179   PetscTablePosition tpos;
180   PetscInt           gid,lid;
181 #else
182   PetscInt           Nbs = baij->Nbs,*indices;
183 #endif
184 
185   PetscFunctionBegin;
186 #if defined (PETSC_USE_CTABLE)
187   /* use a table - Mark Adams */
188   PetscTableCreate(B->mbs,&gid1_lid1);
189   for (i=0; i<B->mbs; i++) {
190     for (j=0; j<B->ilen[i]; j++) {
191       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
192       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
193       if (!data) {
194         /* one based table */
195         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
196       }
197     }
198   }
199   /* form array of columns we need */
200   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
201   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
202   while (tpos) {
203     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
204     gid--; lid--;
205     garray[lid] = gid;
206   }
207   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
208   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
209   for (i=0; i<ec; i++) {
210     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
211   }
212   /* compact out the extra columns in B */
213   for (i=0; i<B->mbs; i++) {
214     for (j=0; j<B->ilen[i]; j++) {
215       PetscInt gid1 = aj[B->i[i] + j] + 1;
216       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
217       lid --;
218       aj[B->i[i]+j] = lid;
219     }
220   }
221   B->nbs     = ec;
222   baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs;
223   ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr);
224   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
225   /* Mark Adams */
226 #else
227   /* For the first stab we make an array as long as the number of columns */
228   /* mark those columns that are in baij->B */
229   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
230   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
231   for (i=0; i<B->mbs; i++) {
232     for (j=0; j<B->ilen[i]; j++) {
233       if (!indices[aj[B->i[i] + j]]) ec++;
234       indices[aj[B->i[i] + j] ] = 1;
235     }
236   }
237 
238   /* form array of columns we need */
239   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
240   ec = 0;
241   for (i=0; i<Nbs; i++) {
242     if (indices[i]) {
243       garray[ec++] = i;
244     }
245   }
246 
247   /* make indices now point into garray */
248   for (i=0; i<ec; i++) {
249     indices[garray[i]] = i;
250   }
251 
252   /* compact out the extra columns in B */
253   for (i=0; i<B->mbs; i++) {
254     for (j=0; j<B->ilen[i]; j++) {
255       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
256     }
257   }
258   B->nbs       = ec;
259   baij->B->cmap.n   = ec*mat->rmap.bs;
260   ierr = PetscFree(indices);CHKERRQ(ierr);
261 #endif
262 
263   /* create local vector that is used to scatter into */
264   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
265 
266   /* create two temporary index sets for building scatter-gather */
267   for (i=0; i<ec; i++) {
268     garray[i] = bs*garray[i];
269   }
270   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
271   for (i=0; i<ec; i++) {
272     garray[i] = garray[i]/bs;
273   }
274 
275   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
276   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
277   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
278   ierr = PetscFree(stmp);CHKERRQ(ierr);
279 
280   /* create temporary global vector to generate scatter context */
281   /* this is inefficient, but otherwise we must do either
282      1) save garray until the first actual scatter when the vector is known or
283      2) have another way of generating a scatter context without a vector.*/
284   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
285 
286   /* gnerate the scatter context */
287   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
288 
289   /*
290       Post the receives for the first matrix vector product. We sync-chronize after
291     this on the chance that the user immediately calls MatMult() after assemblying
292     the matrix.
293   */
294   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
295   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
296 
297   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
298   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
299   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
300   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
301   baij->garray = garray;
302   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
303   ierr = ISDestroy(from);CHKERRQ(ierr);
304   ierr = ISDestroy(to);CHKERRQ(ierr);
305   ierr = VecDestroy(gvec);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 /*
310      Takes the local part of an already assembled MPISBAIJ matrix
311    and disassembles it. This is to allow new nonzeros into the matrix
312    that require more communication in the matrix vector multiply.
313    Thus certain data-structures must be rebuilt.
314 
315    Kind of slow! But that's what application programmers get when
316    they are sloppy.
317 */
318 #undef __FUNCT__
319 #define __FUNCT__ "DisAssemble_MPISBAIJ"
320 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
321 {
322   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
323   Mat            B = baij->B,Bnew;
324   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
325   PetscErrorCode ierr;
326   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray;
327   PetscInt       k,bs=A->rmap.bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap.n;
328   MatScalar      *a = Bbaij->a;
329   PetscScalar    *atmp;
330 #if defined(PETSC_USE_MAT_SINGLE)
331   PetscInt       l;
332 #endif
333 
334   PetscFunctionBegin;
335 #if defined(PETSC_USE_MAT_SINGLE)
336   ierr = PetscMalloc(A->rmap.bs*sizeof(PetscScalar),&atmp);
337 #endif
338   /* free stuff related to matrix-vec multiply */
339   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
340   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
341   baij->lvec = 0;
342   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
343   baij->Mvctx = 0;
344 
345   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
346   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
347   baij->slvec0 = 0;
348   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
349   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
350   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
351   baij->slvec1 = 0;
352 
353   if (baij->colmap) {
354 #if defined (PETSC_USE_CTABLE)
355     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
356 #else
357     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
358     baij->colmap = 0;
359     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
360 #endif
361   }
362 
363   /* make sure that B is assembled so we can access its values */
364   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
365   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
366 
367   /* invent new B and copy stuff over */
368   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
369   for (i=0; i<mbs; i++) {
370     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
371   }
372   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
373   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
374   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
375   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr);
376   ierr = PetscFree(nz);CHKERRQ(ierr);
377 
378   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
379   for (i=0; i<mbs; i++) {
380     rvals[0] = bs*i;
381     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
382     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
383       col = garray[Bbaij->j[j]]*bs;
384       for (k=0; k<bs; k++) {
385 #if defined(PETSC_USE_MAT_SINGLE)
386         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
387 #else
388         atmp = a+j*bs2 + k*bs;
389 #endif
390         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
391         col++;
392       }
393     }
394   }
395 #if defined(PETSC_USE_MAT_SINGLE)
396   ierr = PetscFree(atmp);CHKERRQ(ierr);
397 #endif
398   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
399   baij->garray = 0;
400   ierr = PetscFree(rvals);CHKERRQ(ierr);
401   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
402   ierr = MatDestroy(B);CHKERRQ(ierr);
403   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
404   baij->B = Bnew;
405   A->was_assembled = PETSC_FALSE;
406   PetscFunctionReturn(0);
407 }
408 
409 
410 
411