xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision bba805e6fc5b07c4e8f8dee119088af221a1c6a8)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rowners,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
47   ec_owner = sgarray + 2*ec;
48 
49   ec = 0;
50   for (j=0; j<size; j++){
51     for (i=owners[j]; i<owners[j+1]; i++){
52       if (indices[i]) {
53         garray[ec]   = i;
54         ec_owner[ec] = j;
55         ec++;
56       }
57     }
58   }
59 
60   /* make indices now point into garray */
61   for (i=0; i<ec; i++) indices[garray[i]] = i;
62 
63   /* compact out the extra columns in B */
64   for (i=0; i<mbs; i++) {
65     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
66   }
67   B->nbs      = ec;
68   sbaij->B->n = ec*mat->bs;
69   ierr = PetscFree(indices);CHKERRQ(ierr);
70 
71   /* create local vector that is used to scatter into */
72   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
73 
74   /* create two temporary index sets for building scatter-gather */
75   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
76   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
77   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
78 
79   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
80   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
81 
82   /* generate the scatter context
83      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
84   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
85   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
86   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
87 
88   sbaij->garray = garray;
89   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
90   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
91   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
92   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
93 
94   ierr = ISDestroy(from);CHKERRQ(ierr);
95   ierr = ISDestroy(to);CHKERRQ(ierr);
96 
97   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
98   lsize = (mbs + ec)*bs;
99   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
100   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
101   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
102 
103   sowners = sbaij->slvec0->map.range;
104 
105   /* x index in the IS sfrom */
106   for (i=0; i<ec; i++) {
107     j = ec_owner[i];
108     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
109   }
110   /* b index in the IS sfrom */
111   k = sowners[rank]/bs + mbs;
112   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
113 
114   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
115   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
116 
117   /* x index in the IS sto */
118   k = sowners[rank]/bs + mbs;
119   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
120   /* b index in the IS sto */
121   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
122 
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
124 
125   /* gnerate the SBAIJ scatter context */
126   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
127 
128    /*
129       Post the receives for the first matrix vector product. We sync-chronize after
130     this on the chance that the user immediately calls MatMult() after assemblying
131     the matrix.
132   */
133   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
134 
135   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
136   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
137   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
138   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
139   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
140 
141   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
143   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
144 
145   ierr = PetscFree(stmp);CHKERRQ(ierr);
146   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
147 
148   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
149   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
150   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
151   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
152   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
156 
157   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
158   ierr = ISDestroy(from);CHKERRQ(ierr);
159   ierr = ISDestroy(to);CHKERRQ(ierr);
160   ierr = VecDestroy(gvec);CHKERRQ(ierr);
161   ierr = PetscFree(sgarray);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 #undef __FUNCT__
166 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
167 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
168 {
169   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
170   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
171   PetscErrorCode     ierr;
172   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
173   PetscInt           bs = mat->bs,*stmp;
174   IS                 from,to;
175   Vec                gvec;
176 #if defined (PETSC_USE_CTABLE)
177   PetscTable         gid1_lid1;
178   PetscTablePosition tpos;
179   PetscInt           gid,lid;
180 #else
181   PetscInt           Nbs = baij->Nbs,*indices;
182 #endif
183 
184   PetscFunctionBegin;
185 #if defined (PETSC_USE_CTABLE)
186   /* use a table - Mark Adams */
187   PetscTableCreate(B->mbs,&gid1_lid1);
188   for (i=0; i<B->mbs; i++) {
189     for (j=0; j<B->ilen[i]; j++) {
190       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
191       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
192       if (!data) {
193         /* one based table */
194         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
195       }
196     }
197   }
198   /* form array of columns we need */
199   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
200   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
201   while (tpos) {
202     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
203     gid--; lid--;
204     garray[lid] = gid;
205   }
206   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
207   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
208   for (i=0; i<ec; i++) {
209     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
210   }
211   /* compact out the extra columns in B */
212   for (i=0; i<B->mbs; i++) {
213     for (j=0; j<B->ilen[i]; j++) {
214       PetscInt gid1 = aj[B->i[i] + j] + 1;
215       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
216       lid --;
217       aj[B->i[i]+j] = lid;
218     }
219   }
220   B->nbs     = ec;
221   baij->B->n = ec*mat->bs;
222   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
223   /* Mark Adams */
224 #else
225   /* For the first stab we make an array as long as the number of columns */
226   /* mark those columns that are in baij->B */
227   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
228   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
229   for (i=0; i<B->mbs; i++) {
230     for (j=0; j<B->ilen[i]; j++) {
231       if (!indices[aj[B->i[i] + j]]) ec++;
232       indices[aj[B->i[i] + j] ] = 1;
233     }
234   }
235 
236   /* form array of columns we need */
237   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
238   ec = 0;
239   for (i=0; i<Nbs; i++) {
240     if (indices[i]) {
241       garray[ec++] = i;
242     }
243   }
244 
245   /* make indices now point into garray */
246   for (i=0; i<ec; i++) {
247     indices[garray[i]] = i;
248   }
249 
250   /* compact out the extra columns in B */
251   for (i=0; i<B->mbs; i++) {
252     for (j=0; j<B->ilen[i]; j++) {
253       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
254     }
255   }
256   B->nbs       = ec;
257   baij->B->n   = ec*mat->bs;
258   ierr = PetscFree(indices);CHKERRQ(ierr);
259 #endif
260 
261   /* create local vector that is used to scatter into */
262   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
263 
264   /* create two temporary index sets for building scatter-gather */
265   for (i=0; i<ec; i++) {
266     garray[i] = bs*garray[i];
267   }
268   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
269   for (i=0; i<ec; i++) {
270     garray[i] = garray[i]/bs;
271   }
272 
273   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
274   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
275   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
276   ierr = PetscFree(stmp);CHKERRQ(ierr);
277 
278   /* create temporary global vector to generate scatter context */
279   /* this is inefficient, but otherwise we must do either
280      1) save garray until the first actual scatter when the vector is known or
281      2) have another way of generating a scatter context without a vector.*/
282   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
283 
284   /* gnerate the scatter context */
285   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
286 
287   /*
288       Post the receives for the first matrix vector product. We sync-chronize after
289     this on the chance that the user immediately calls MatMult() after assemblying
290     the matrix.
291   */
292   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
293   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
294 
295   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
296   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
297   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
298   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
299   baij->garray = garray;
300   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
301   ierr = ISDestroy(from);CHKERRQ(ierr);
302   ierr = ISDestroy(to);CHKERRQ(ierr);
303   ierr = VecDestroy(gvec);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 /*
308      Takes the local part of an already assembled MPISBAIJ matrix
309    and disassembles it. This is to allow new nonzeros into the matrix
310    that require more communication in the matrix vector multiply.
311    Thus certain data-structures must be rebuilt.
312 
313    Kind of slow! But that's what application programmers get when
314    they are sloppy.
315 */
316 #undef __FUNCT__
317 #define __FUNCT__ "DisAssemble_MPISBAIJ"
318 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
319 {
320   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
321   Mat            B = baij->B,Bnew;
322   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
323   PetscErrorCode ierr;
324   PetscInt       i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
325   PetscInt       k,bs=A->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
326   MatScalar      *a = Bbaij->a;
327   PetscScalar    *atmp;
328 #if defined(PETSC_USE_MAT_SINGLE)
329   PetscInt       l;
330 #endif
331 
332   PetscFunctionBegin;
333 #if defined(PETSC_USE_MAT_SINGLE)
334   ierr = PetscMalloc(A->bs*sizeof(PetscScalar),&atmp);
335 #endif
336   /* free stuff related to matrix-vec multiply */
337   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
338   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
339   baij->lvec = 0;
340   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
341   baij->Mvctx = 0;
342 
343   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
344   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
345   baij->slvec0 = 0;
346   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
347   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
348   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
349   baij->slvec1 = 0;
350 
351   if (baij->colmap) {
352 #if defined (PETSC_USE_CTABLE)
353     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
354 #else
355     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
356     baij->colmap = 0;
357     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
358 #endif
359   }
360 
361   /* make sure that B is assembled so we can access its values */
362   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
364 
365   /* invent new B and copy stuff over */
366   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
367   for (i=0; i<mbs; i++) {
368     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
369   }
370   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
371   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
372   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
373   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
374   ierr = PetscFree(nz);CHKERRQ(ierr);
375 
376   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
377   for (i=0; i<mbs; i++) {
378     rvals[0] = bs*i;
379     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
380     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
381       col = garray[Bbaij->j[j]]*bs;
382       for (k=0; k<bs; k++) {
383 #if defined(PETSC_USE_MAT_SINGLE)
384         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
385 #else
386         atmp = a+j*bs2 + k*bs;
387 #endif
388         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
389         col++;
390       }
391     }
392   }
393 #if defined(PETSC_USE_MAT_SINGLE)
394   ierr = PetscFree(atmp);CHKERRQ(ierr);
395 #endif
396   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
397   baij->garray = 0;
398   ierr = PetscFree(rvals);CHKERRQ(ierr);
399   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
400   ierr = MatDestroy(B);CHKERRQ(ierr);
401   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
402   baij->B = Bnew;
403   A->was_assembled = PETSC_FALSE;
404   PetscFunctionReturn(0);
405 }
406 
407 
408 
409