xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 3c48a1e8da19189ff2402a4e41a2fc082d52c349)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS             from,to;
20   Vec            gvec;
21   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
22   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
23   PetscScalar    *ptr;
24 
25   PetscFunctionBegin;
26   if (sbaij->sMvctx) {
27     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
28     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
29     sbaij->sMvctx = 0;
30   }
31 
32   /* For the first stab we make an array as long as the number of columns */
33   /* mark those columns that are in sbaij->B */
34   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
35   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
36   for (i=0; i<mbs; i++) {
37     for (j=0; j<B->ilen[i]; j++) {
38       if (!indices[aj[B->i[i] + j]]) ec++;
39       indices[aj[B->i[i] + j] ] = 1;
40     }
41   }
42 
43   /* form arrays of columns we need */
44   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
45   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
46 
47   ec = 0;
48   for (j=0; j<size; j++){
49     for (i=owners[j]; i<owners[j+1]; i++){
50       if (indices[i]) {
51         garray[ec]   = i;
52         ec_owner[ec] = j;
53         ec++;
54       }
55     }
56   }
57 
58   /* make indices now point into garray */
59   for (i=0; i<ec; i++) indices[garray[i]] = i;
60 
61   /* compact out the extra columns in B */
62   for (i=0; i<mbs; i++) {
63     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
64   }
65   B->nbs      = ec;
66   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
67   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
68   ierr = PetscFree(indices);CHKERRQ(ierr);
69 
70   /* create local vector that is used to scatter into */
71   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
72 
73   /* create two temporary index sets for building scatter-gather */
74   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
75   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
76   for (i=0; i<ec; i++) { stmp[i] = i; }
77   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
78 
79   /* generate the scatter context
80      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
81   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
82   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
83   ierr = VecDestroy(gvec);CHKERRQ(ierr);
84 
85   sbaij->garray = garray;
86   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
87   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
88   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
89   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
90 
91   ierr = ISDestroy(from);CHKERRQ(ierr);
92   ierr = ISDestroy(to);CHKERRQ(ierr);
93 
94   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
95   lsize = (mbs + ec)*bs;
96   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
97   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
98   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
99 
100   sowners = sbaij->slvec0->map->range;
101 
102   /* x index in the IS sfrom */
103   for (i=0; i<ec; i++) {
104     j = ec_owner[i];
105     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
106   }
107   /* b index in the IS sfrom */
108   k = sowners[rank]/bs + mbs;
109   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
110   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
111 
112   /* x index in the IS sto */
113   k = sowners[rank]/bs + mbs;
114   for (i=0; i<ec; i++) stmp[i] = (k + i);
115   /* b index in the IS sto */
116   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
117 
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
119 
120   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
121 
122   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
123   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
124   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
126   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
127 
128   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
130   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
131 
132   ierr = PetscFree(stmp);CHKERRQ(ierr);
133   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
134 
135   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
140   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
143 
144   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
145   ierr = ISDestroy(from);CHKERRQ(ierr);
146   ierr = ISDestroy(to);CHKERRQ(ierr);
147   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 #undef __FUNCT__
152 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
153 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
154 {
155   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
156   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
157   PetscErrorCode     ierr;
158   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
159   PetscInt           bs = mat->rmap->bs,*stmp;
160   IS                 from,to;
161   Vec                gvec;
162 #if defined (PETSC_USE_CTABLE)
163   PetscTable         gid1_lid1;
164   PetscTablePosition tpos;
165   PetscInt           gid,lid;
166 #else
167   PetscInt           Nbs = baij->Nbs,*indices;
168 #endif
169 
170   PetscFunctionBegin;
171 #if defined (PETSC_USE_CTABLE)
172   /* use a table - Mark Adams */
173   PetscTableCreate(B->mbs,&gid1_lid1);
174   for (i=0; i<B->mbs; i++) {
175     for (j=0; j<B->ilen[i]; j++) {
176       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
177       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
178       if (!data) {
179         /* one based table */
180         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
181       }
182     }
183   }
184   /* form array of columns we need */
185   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
186   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
187   while (tpos) {
188     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
189     gid--; lid--;
190     garray[lid] = gid;
191   }
192   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
193   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
194   for (i=0; i<ec; i++) {
195     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197   /* compact out the extra columns in B */
198   for (i=0; i<B->mbs; i++) {
199     for (j=0; j<B->ilen[i]; j++) {
200       PetscInt gid1 = aj[B->i[i] + j] + 1;
201       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
202       lid --;
203       aj[B->i[i]+j] = lid;
204     }
205   }
206   B->nbs     = ec;
207   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
208   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
209   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
210   /* Mark Adams */
211 #else
212   /* For the first stab we make an array as long as the number of columns */
213   /* mark those columns that are in baij->B */
214   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
215   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
216   for (i=0; i<B->mbs; i++) {
217     for (j=0; j<B->ilen[i]; j++) {
218       if (!indices[aj[B->i[i] + j]]) ec++;
219       indices[aj[B->i[i] + j] ] = 1;
220     }
221   }
222 
223   /* form array of columns we need */
224   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
225   ec = 0;
226   for (i=0; i<Nbs; i++) {
227     if (indices[i]) {
228       garray[ec++] = i;
229     }
230   }
231 
232   /* make indices now point into garray */
233   for (i=0; i<ec; i++) {
234     indices[garray[i]] = i;
235   }
236 
237   /* compact out the extra columns in B */
238   for (i=0; i<B->mbs; i++) {
239     for (j=0; j<B->ilen[i]; j++) {
240       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
241     }
242   }
243   B->nbs       = ec;
244   baij->B->cmap->n   = ec*mat->rmap->bs;
245   ierr = PetscFree(indices);CHKERRQ(ierr);
246 #endif
247 
248   /* create local vector that is used to scatter into */
249   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
250 
251   /* create two temporary index sets for building scatter-gather */
252   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
253 
254   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
255   for (i=0; i<ec; i++) { stmp[i] = i; }
256   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
257 
258   /* create temporary global vector to generate scatter context */
259   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
260   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
261   ierr = VecDestroy(gvec);CHKERRQ(ierr);
262 
263   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
264   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
265   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
266   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
267   baij->garray = garray;
268   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
269   ierr = ISDestroy(from);CHKERRQ(ierr);
270   ierr = ISDestroy(to);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 /*
275      Takes the local part of an already assembled MPISBAIJ matrix
276    and disassembles it. This is to allow new nonzeros into the matrix
277    that require more communication in the matrix vector multiply.
278    Thus certain data-structures must be rebuilt.
279 
280    Kind of slow! But that's what application programmers get when
281    they are sloppy.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "DisAssemble_MPISBAIJ"
285 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
288   Mat            B = baij->B,Bnew;
289   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
290   PetscErrorCode ierr;
291   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
292   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
293   MatScalar      *a = Bbaij->a;
294   PetscScalar    *atmp;
295 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
296   PetscInt       l;
297 #endif
298 
299   PetscFunctionBegin;
300 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
301   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
302 #endif
303   /* free stuff related to matrix-vec multiply */
304   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
305   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
306   baij->lvec = 0;
307   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
308   baij->Mvctx = 0;
309 
310   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
311   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
312   baij->slvec0 = 0;
313   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
314   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
315   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
316   baij->slvec1 = 0;
317 
318   if (baij->colmap) {
319 #if defined (PETSC_USE_CTABLE)
320     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
321 #else
322     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
323     baij->colmap = 0;
324     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
325 #endif
326   }
327 
328   /* make sure that B is assembled so we can access its values */
329   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331 
332   /* invent new B and copy stuff over */
333   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
334   for (i=0; i<mbs; i++) {
335     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
336   }
337   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
338   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
339   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
340   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
341   ierr = PetscFree(nz);CHKERRQ(ierr);
342 
343   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
344   for (i=0; i<mbs; i++) {
345     rvals[0] = bs*i;
346     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
347     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
348       col = garray[Bbaij->j[j]]*bs;
349       for (k=0; k<bs; k++) {
350 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
351         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
352 #else
353         atmp = a+j*bs2 + k*bs;
354 #endif
355         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
356         col++;
357       }
358     }
359   }
360 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
361   ierr = PetscFree(atmp);CHKERRQ(ierr);
362 #endif
363   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
364   baij->garray = 0;
365   ierr = PetscFree(rvals);CHKERRQ(ierr);
366   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
367   ierr = MatDestroy(B);CHKERRQ(ierr);
368   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
369   baij->B = Bnew;
370   A->was_assembled = PETSC_FALSE;
371   PetscFunctionReturn(0);
372 }
373 
374 
375 
376