xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
13   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
14   PetscErrorCode ierr;
15   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
16   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
17   IS             from,to;
18   Vec            gvec;
19   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
20   PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
21   const PetscInt *sowners;
22   PetscScalar    *ptr;
23 
24   PetscFunctionBegin;
25   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
26 
27   /* For the first stab we make an array as long as the number of columns */
28   /* mark those columns that are in sbaij->B */
29   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
30   for (i=0; i<mbs; i++) {
31     for (j=0; j<B->ilen[i]; j++) {
32       if (!indices[aj[B->i[i] + j]]) ec++;
33       indices[aj[B->i[i] + j]] = 1;
34     }
35   }
36 
37   /* form arrays of columns we need */
38   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
39   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
40 
41   ec = 0;
42   for (j=0; j<size; j++) {
43     for (i=owners[j]; i<owners[j+1]; i++) {
44       if (indices[i]) {
45         garray[ec]   = i;
46         ec_owner[ec] = j;
47         ec++;
48       }
49     }
50   }
51 
52   /* make indices now point into garray */
53   for (i=0; i<ec; i++) indices[garray[i]] = i;
54 
55   /* compact out the extra columns in B */
56   for (i=0; i<mbs; i++) {
57     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
58   }
59   B->nbs = ec;
60 
61   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
62 
63   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
64   ierr = PetscFree(indices);CHKERRQ(ierr);
65 
66   /* create local vector that is used to scatter into */
67   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
68 
69   /* create two temporary index sets for building scatter-gather */
70   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
71   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
72   for (i=0; i<ec; i++) stmp[i] = i;
73   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
74 
75   /* generate the scatter context
76      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
77   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
78   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
79   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
80 
81   sbaij->garray = garray;
82 
83   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
84   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
85   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
86   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
87 
88   ierr = ISDestroy(&from);CHKERRQ(ierr);
89   ierr = ISDestroy(&to);CHKERRQ(ierr);
90 
91   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
92   lsize = (mbs + ec)*bs;
93   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
94   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
95   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
96 
97   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
98 
99   /* x index in the IS sfrom */
100   for (i=0; i<ec; i++) {
101     j          = ec_owner[i];
102     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
103   }
104   /* b index in the IS sfrom */
105   k = sowners[rank]/bs + mbs;
106   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
107   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
108 
109   /* x index in the IS sto */
110   k = sowners[rank]/bs + mbs;
111   for (i=0; i<ec; i++) stmp[i] = (k + i);
112   /* b index in the IS sto */
113   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
114 
115   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
116 
117   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
118 
119   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
120   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
122   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
123   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
124 
125   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
127   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
128 
129   ierr = PetscFree(stmp);CHKERRQ(ierr);
130   ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
131 
132   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
140 
141   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
142   ierr = ISDestroy(&from);CHKERRQ(ierr);
143   ierr = ISDestroy(&to);CHKERRQ(ierr);
144   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
149 {
150   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
151   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
152   PetscErrorCode ierr;
153   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
154   PetscInt       bs = mat->rmap->bs,*stmp;
155   IS             from,to;
156   Vec            gvec;
157 #if defined(PETSC_USE_CTABLE)
158   PetscTable         gid1_lid1;
159   PetscTablePosition tpos;
160   PetscInt           gid,lid;
161 #else
162   PetscInt Nbs = baij->Nbs,*indices;
163 #endif
164 
165   PetscFunctionBegin;
166 #if defined(PETSC_USE_CTABLE)
167   /* use a table - Mark Adams */
168   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
169   for (i=0; i<B->mbs; i++) {
170     for (j=0; j<B->ilen[i]; j++) {
171       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
172       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
173       if (!data) {
174         /* one based table */
175         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
176       }
177     }
178   }
179   /* form array of columns we need */
180   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
181   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
182   while (tpos) {
183     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
184     gid--; lid--;
185     garray[lid] = gid;
186   }
187   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
188   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
189   for (i=0; i<ec; i++) {
190     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
191   }
192   /* compact out the extra columns in B */
193   for (i=0; i<B->mbs; i++) {
194     for (j=0; j<B->ilen[i]; j++) {
195       PetscInt gid1 = aj[B->i[i] + j] + 1;
196       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
197       lid--;
198       aj[B->i[i]+j] = lid;
199     }
200   }
201   B->nbs = ec;
202 
203   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
204 
205   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
206   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
207 #else
208   /* For the first stab we make an array as long as the number of columns */
209   /* mark those columns that are in baij->B */
210   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
211   for (i=0; i<B->mbs; i++) {
212     for (j=0; j<B->ilen[i]; j++) {
213       if (!indices[aj[B->i[i] + j]]) ec++;
214       indices[aj[B->i[i] + j]] = 1;
215     }
216   }
217 
218   /* form array of columns we need */
219   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
220   ec = 0;
221   for (i=0; i<Nbs; i++) {
222     if (indices[i]) {
223       garray[ec++] = i;
224     }
225   }
226 
227   /* make indices now point into garray */
228   for (i=0; i<ec; i++) indices[garray[i]] = i;
229 
230   /* compact out the extra columns in B */
231   for (i=0; i<B->mbs; i++) {
232     for (j=0; j<B->ilen[i]; j++) {
233       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
234     }
235   }
236   B->nbs = ec;
237 
238   baij->B->cmap->n = ec*mat->rmap->bs;
239 
240   ierr = PetscFree(indices);CHKERRQ(ierr);
241 #endif
242 
243   /* create local vector that is used to scatter into */
244   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
245 
246   /* create two temporary index sets for building scatter-gather */
247   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
248 
249   ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr);
250   for (i=0; i<ec; i++) stmp[i] = i;
251   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
252 
253   /* create temporary global vector to generate scatter context */
254   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
255   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
256   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
257 
258   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
259   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
260   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
261   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
262 
263   baij->garray = garray;
264 
265   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
266   ierr = ISDestroy(&from);CHKERRQ(ierr);
267   ierr = ISDestroy(&to);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 /*
272      Takes the local part of an already assembled MPISBAIJ matrix
273    and disassembles it. This is to allow new nonzeros into the matrix
274    that require more communication in the matrix vector multiply.
275    Thus certain data-structures must be rebuilt.
276 
277    Kind of slow! But that's what application programmers get when
278    they are sloppy.
279 */
280 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
281 {
282   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
283   Mat            B      = baij->B,Bnew;
284   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
285   PetscErrorCode ierr;
286   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
287   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
288   MatScalar      *a = Bbaij->a;
289   PetscScalar    *atmp;
290 #if defined(PETSC_USE_REAL_MAT_SINGLE)
291   PetscInt l;
292 #endif
293 
294   PetscFunctionBegin;
295 #if defined(PETSC_USE_REAL_MAT_SINGLE)
296   ierr = PetscMalloc1(A->rmap->bs,&atmp);
297 #endif
298   /* free stuff related to matrix-vec multiply */
299   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
300   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
301   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
302 
303   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
304   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
305   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
306   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
307   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
308 
309   if (baij->colmap) {
310 #if defined(PETSC_USE_CTABLE)
311     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
312 #else
313     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
314     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
315 #endif
316   }
317 
318   /* make sure that B is assembled so we can access its values */
319   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321 
322   /* invent new B and copy stuff over */
323   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
324   for (i=0; i<mbs; i++) {
325     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
326   }
327   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
328   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
329   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
330   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
331   ierr = PetscFree(nz);CHKERRQ(ierr);
332 
333   ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
334   /*
335    Ensure that B's nonzerostate is monotonically increasing.
336    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
337    */
338   Bnew->nonzerostate = B->nonzerostate;
339   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
340   for (i=0; i<mbs; i++) {
341     rvals[0] = bs*i;
342     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
343     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
344       col = garray[Bbaij->j[j]]*bs;
345       for (k=0; k<bs; k++) {
346 #if defined(PETSC_USE_REAL_MAT_SINGLE)
347         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
348 #else
349         atmp = a+j*bs2 + k*bs;
350 #endif
351         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
352         col++;
353       }
354     }
355   }
356 #if defined(PETSC_USE_REAL_MAT_SINGLE)
357   ierr = PetscFree(atmp);CHKERRQ(ierr);
358 #endif
359   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
360 
361   baij->garray = 0;
362 
363   ierr = PetscFree(rvals);CHKERRQ(ierr);
364   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
365   ierr = MatDestroy(&B);CHKERRQ(ierr);
366   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
367 
368   baij->B = Bnew;
369 
370   A->was_assembled = PETSC_FALSE;
371   PetscFunctionReturn(0);
372 }
373