xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 4e4bbfaa3814cc83b5851d85be69070845f5653e)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscErrorCode ierr;
12   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
13   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
14   IS             from,to;
15   Vec            gvec;
16   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
17   PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
18   const PetscInt *sowners;
19   PetscScalar    *ptr;
20 
21   PetscFunctionBegin;
22   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
23 
24   /* For the first stab we make an array as long as the number of columns */
25   /* mark those columns that are in sbaij->B */
26   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
27   for (i=0; i<mbs; i++) {
28     for (j=0; j<B->ilen[i]; j++) {
29       if (!indices[aj[B->i[i] + j]]) ec++;
30       indices[aj[B->i[i] + j]] = 1;
31     }
32   }
33 
34   /* form arrays of columns we need */
35   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
36   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
37 
38   ec = 0;
39   for (j=0; j<size; j++) {
40     for (i=owners[j]; i<owners[j+1]; i++) {
41       if (indices[i]) {
42         garray[ec]   = i;
43         ec_owner[ec] = j;
44         ec++;
45       }
46     }
47   }
48 
49   /* make indices now point into garray */
50   for (i=0; i<ec; i++) indices[garray[i]] = i;
51 
52   /* compact out the extra columns in B */
53   for (i=0; i<mbs; i++) {
54     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
55   }
56   B->nbs = ec;
57   ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr);
58   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr);
59   ierr = PetscFree(indices);CHKERRQ(ierr);
60 
61   /* create local vector that is used to scatter into */
62   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
63 
64   /* create two temporary index sets for building scatter-gather */
65   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
66   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
67   for (i=0; i<ec; i++) stmp[i] = i;
68   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
69 
70   /* generate the scatter context
71      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefull for some applications */
72   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
73   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
74   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
75 
76   sbaij->garray = garray;
77 
78   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
79   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
80   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
81   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
82 
83   ierr = ISDestroy(&from);CHKERRQ(ierr);
84   ierr = ISDestroy(&to);CHKERRQ(ierr);
85 
86   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
87   lsize = (mbs + ec)*bs;
88   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
89   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
90   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
91 
92   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
93 
94   /* x index in the IS sfrom */
95   for (i=0; i<ec; i++) {
96     j          = ec_owner[i];
97     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
98   }
99   /* b index in the IS sfrom */
100   k = sowners[rank]/bs + mbs;
101   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
102   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
103 
104   /* x index in the IS sto */
105   k = sowners[rank]/bs + mbs;
106   for (i=0; i<ec; i++) stmp[i] = (k + i);
107   /* b index in the IS sto */
108   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
109 
110   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
111 
112   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
113 
114   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
115   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
116   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
117   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
118   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
119 
120   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
122   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
123 
124   ierr = PetscFree(stmp);CHKERRQ(ierr);
125   ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
126 
127   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
128   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
129   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
130   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
131   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
135 
136   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
137   ierr = ISDestroy(&from);CHKERRQ(ierr);
138   ierr = ISDestroy(&to);CHKERRQ(ierr);
139   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
144 {
145   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
146   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
147   PetscErrorCode ierr;
148   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
149   PetscInt       bs = mat->rmap->bs,*stmp;
150   IS             from,to;
151   Vec            gvec;
152 #if defined(PETSC_USE_CTABLE)
153   PetscTable         gid1_lid1;
154   PetscTablePosition tpos;
155   PetscInt           gid,lid;
156 #else
157   PetscInt Nbs = baij->Nbs,*indices;
158 #endif
159 
160   PetscFunctionBegin;
161 #if defined(PETSC_USE_CTABLE)
162   /* use a table - Mark Adams */
163   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
164   for (i=0; i<B->mbs; i++) {
165     for (j=0; j<B->ilen[i]; j++) {
166       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
167       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
168       if (!data) {
169         /* one based table */
170         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
171       }
172     }
173   }
174   /* form array of columns we need */
175   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
176   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
177   while (tpos) {
178     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
179     gid--; lid--;
180     garray[lid] = gid;
181   }
182   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
183   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
184   for (i=0; i<ec; i++) {
185     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
186   }
187   /* compact out the extra columns in B */
188   for (i=0; i<B->mbs; i++) {
189     for (j=0; j<B->ilen[i]; j++) {
190       PetscInt gid1 = aj[B->i[i] + j] + 1;
191       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
192       lid--;
193       aj[B->i[i]+j] = lid;
194     }
195   }
196   B->nbs = ec;
197   ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr);
198   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr);
199   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
200 #else
201   /* For the first stab we make an array as long as the number of columns */
202   /* mark those columns that are in baij->B */
203   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
204   for (i=0; i<B->mbs; i++) {
205     for (j=0; j<B->ilen[i]; j++) {
206       if (!indices[aj[B->i[i] + j]]) ec++;
207       indices[aj[B->i[i] + j]] = 1;
208     }
209   }
210 
211   /* form array of columns we need */
212   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
213   ec = 0;
214   for (i=0; i<Nbs; i++) {
215     if (indices[i]) {
216       garray[ec++] = i;
217     }
218   }
219 
220   /* make indices now point into garray */
221   for (i=0; i<ec; i++) indices[garray[i]] = i;
222 
223   /* compact out the extra columns in B */
224   for (i=0; i<B->mbs; i++) {
225     for (j=0; j<B->ilen[i]; j++) {
226       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
227     }
228   }
229   B->nbs = ec;
230 
231   baij->B->cmap->n = ec*mat->rmap->bs;
232 
233   ierr = PetscFree(indices);CHKERRQ(ierr);
234 #endif
235 
236   /* create local vector that is used to scatter into */
237   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
238 
239   /* create two temporary index sets for building scatter-gather */
240   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
241 
242   ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr);
243   for (i=0; i<ec; i++) stmp[i] = i;
244   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
245 
246   /* create temporary global vector to generate scatter context */
247   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
248   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
249   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
250 
251   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
252   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
253   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
254   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
255 
256   baij->garray = garray;
257 
258   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
259   ierr = ISDestroy(&from);CHKERRQ(ierr);
260   ierr = ISDestroy(&to);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /*
265      Takes the local part of an already assembled MPISBAIJ matrix
266    and disassembles it. This is to allow new nonzeros into the matrix
267    that require more communication in the matrix vector multiply.
268    Thus certain data-structures must be rebuilt.
269 
270    Kind of slow! But that's what application programmers get when
271    they are sloppy.
272 */
273 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
274 {
275   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
276   Mat            B      = baij->B,Bnew;
277   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
280   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
281   MatScalar      *a = Bbaij->a;
282   PetscScalar    *atmp;
283 #if defined(PETSC_USE_REAL_MAT_SINGLE)
284   PetscInt l;
285 #endif
286 
287   PetscFunctionBegin;
288 #if defined(PETSC_USE_REAL_MAT_SINGLE)
289   ierr = PetscMalloc1(A->rmap->bs,&atmp);
290 #endif
291   /* free stuff related to matrix-vec multiply */
292   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
293   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
294   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
295 
296   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
297   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
298   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
299   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
300   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
301 
302   if (baij->colmap) {
303 #if defined(PETSC_USE_CTABLE)
304     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
305 #else
306     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
307     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
308 #endif
309   }
310 
311   /* make sure that B is assembled so we can access its values */
312   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
313   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
314 
315   /* invent new B and copy stuff over */
316   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
317   for (i=0; i<mbs; i++) {
318     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
319   }
320   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
321   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
322   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
323   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
324   ierr = PetscFree(nz);CHKERRQ(ierr);
325 
326   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
327     ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
328   }
329 
330   /*
331    Ensure that B's nonzerostate is monotonically increasing.
332    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
333    */
334   Bnew->nonzerostate = B->nonzerostate;
335   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
336   for (i=0; i<mbs; i++) {
337     rvals[0] = bs*i;
338     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
339     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
340       col = garray[Bbaij->j[j]]*bs;
341       for (k=0; k<bs; k++) {
342 #if defined(PETSC_USE_REAL_MAT_SINGLE)
343         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
344 #else
345         atmp = a+j*bs2 + k*bs;
346 #endif
347         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
348         col++;
349       }
350     }
351   }
352 #if defined(PETSC_USE_REAL_MAT_SINGLE)
353   ierr = PetscFree(atmp);CHKERRQ(ierr);
354 #endif
355   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
356 
357   baij->garray = 0;
358 
359   ierr = PetscFree(rvals);CHKERRQ(ierr);
360   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
361   ierr = MatDestroy(&B);CHKERRQ(ierr);
362   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
363 
364   baij->B = Bnew;
365 
366   A->was_assembled = PETSC_FALSE;
367   PetscFunctionReturn(0);
368 }
369