xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision fdb35ca6b8bfd78facfcbaad2994e8eeeb8f8a19)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS             from,to;
20   Vec            gvec;
21   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
22   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
23   PetscScalar    *ptr;
24 
25   PetscFunctionBegin;
26   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
27 
28   /* For the first stab we make an array as long as the number of columns */
29   /* mark those columns that are in sbaij->B */
30   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
31   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
32   for (i=0; i<mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       if (!indices[aj[B->i[i] + j]]) ec++;
35       indices[aj[B->i[i] + j]] = 1;
36     }
37   }
38 
39   /* form arrays of columns we need */
40   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
41   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
42 
43   ec = 0;
44   for (j=0; j<size; j++) {
45     for (i=owners[j]; i<owners[j+1]; i++) {
46       if (indices[i]) {
47         garray[ec]   = i;
48         ec_owner[ec] = j;
49         ec++;
50       }
51     }
52   }
53 
54   /* make indices now point into garray */
55   for (i=0; i<ec; i++) indices[garray[i]] = i;
56 
57   /* compact out the extra columns in B */
58   for (i=0; i<mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
60   }
61   B->nbs = ec;
62 
63   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
64 
65   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
66   ierr = PetscFree(indices);CHKERRQ(ierr);
67 
68   /* create local vector that is used to scatter into */
69   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
70 
71   /* create two temporary index sets for building scatter-gather */
72   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
73   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
74   for (i=0; i<ec; i++) stmp[i] = i;
75   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
76 
77   /* generate the scatter context
78      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
79   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
80   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
81   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
82 
83   sbaij->garray = garray;
84 
85   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
86   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
87   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
88   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
89 
90   ierr = ISDestroy(&from);CHKERRQ(ierr);
91   ierr = ISDestroy(&to);CHKERRQ(ierr);
92 
93   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
94   lsize = (mbs + ec)*bs;
95   ierr  = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
96   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
97   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
98 
99   sowners = sbaij->slvec0->map->range;
100 
101   /* x index in the IS sfrom */
102   for (i=0; i<ec; i++) {
103     j          = ec_owner[i];
104     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
105   }
106   /* b index in the IS sfrom */
107   k = sowners[rank]/bs + mbs;
108   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
109   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
110 
111   /* x index in the IS sto */
112   k = sowners[rank]/bs + mbs;
113   for (i=0; i<ec; i++) stmp[i] = (k + i);
114   /* b index in the IS sto */
115   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
116 
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
118 
119   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
120 
121   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
122   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
123   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
124   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
125   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
126 
127   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
129   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
130 
131   ierr = PetscFree(stmp);CHKERRQ(ierr);
132   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
133 
134   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
140   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
142 
143   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
144   ierr = ISDestroy(&from);CHKERRQ(ierr);
145   ierr = ISDestroy(&to);CHKERRQ(ierr);
146   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
152 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
153 {
154   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
155   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
156   PetscErrorCode ierr;
157   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
158   PetscInt       bs = mat->rmap->bs,*stmp;
159   IS             from,to;
160   Vec            gvec;
161 #if defined(PETSC_USE_CTABLE)
162   PetscTable         gid1_lid1;
163   PetscTablePosition tpos;
164   PetscInt           gid,lid;
165 #else
166   PetscInt Nbs = baij->Nbs,*indices;
167 #endif
168 
169   PetscFunctionBegin;
170 #if defined(PETSC_USE_CTABLE)
171   /* use a table - Mark Adams */
172   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
173   for (i=0; i<B->mbs; i++) {
174     for (j=0; j<B->ilen[i]; j++) {
175       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
176       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
177       if (!data) {
178         /* one based table */
179         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
180       }
181     }
182   }
183   /* form array of columns we need */
184   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
185   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
186   while (tpos) {
187     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
188     gid--; lid--;
189     garray[lid] = gid;
190   }
191   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
192   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
193   for (i=0; i<ec; i++) {
194     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
195   }
196   /* compact out the extra columns in B */
197   for (i=0; i<B->mbs; i++) {
198     for (j=0; j<B->ilen[i]; j++) {
199       PetscInt gid1 = aj[B->i[i] + j] + 1;
200       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
201       lid--;
202       aj[B->i[i]+j] = lid;
203     }
204   }
205   B->nbs = ec;
206 
207   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
208 
209   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
210   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
211 #else
212   /* For the first stab we make an array as long as the number of columns */
213   /* mark those columns that are in baij->B */
214   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
215   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
216   for (i=0; i<B->mbs; i++) {
217     for (j=0; j<B->ilen[i]; j++) {
218       if (!indices[aj[B->i[i] + j]]) ec++;
219       indices[aj[B->i[i] + j]] = 1;
220     }
221   }
222 
223   /* form array of columns we need */
224   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
225   ec = 0;
226   for (i=0; i<Nbs; i++) {
227     if (indices[i]) {
228       garray[ec++] = i;
229     }
230   }
231 
232   /* make indices now point into garray */
233   for (i=0; i<ec; i++) indices[garray[i]] = i;
234 
235   /* compact out the extra columns in B */
236   for (i=0; i<B->mbs; i++) {
237     for (j=0; j<B->ilen[i]; j++) {
238       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
239     }
240   }
241   B->nbs = ec;
242 
243   baij->B->cmap->n = ec*mat->rmap->bs;
244 
245   ierr = PetscFree(indices);CHKERRQ(ierr);
246 #endif
247 
248   /* create local vector that is used to scatter into */
249   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
250 
251   /* create two temporary index sets for building scatter-gather */
252   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
253 
254   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
255   for (i=0; i<ec; i++) stmp[i] = i;
256   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
257 
258   /* create temporary global vector to generate scatter context */
259   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
260   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
261   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
262 
263   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
264   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
265   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
266   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
267 
268   baij->garray = garray;
269 
270   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
271   ierr = ISDestroy(&from);CHKERRQ(ierr);
272   ierr = ISDestroy(&to);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /*
277      Takes the local part of an already assembled MPISBAIJ matrix
278    and disassembles it. This is to allow new nonzeros into the matrix
279    that require more communication in the matrix vector multiply.
280    Thus certain data-structures must be rebuilt.
281 
282    Kind of slow! But that's what application programmers get when
283    they are sloppy.
284 */
285 #undef __FUNCT__
286 #define __FUNCT__ "MatDisAssemble_MPISBAIJ"
287 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
288 {
289   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
290   Mat            B      = baij->B,Bnew;
291   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
292   PetscErrorCode ierr;
293   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
294   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
295   MatScalar      *a = Bbaij->a;
296   PetscScalar    *atmp;
297 #if defined(PETSC_USE_REAL_MAT_SINGLE)
298   PetscInt l;
299 #endif
300 
301   PetscFunctionBegin;
302 #if defined(PETSC_USE_REAL_MAT_SINGLE)
303   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
304 #endif
305   /* free stuff related to matrix-vec multiply */
306   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
307   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
308   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
309 
310   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
311   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
312   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
313   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
314   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
315 
316   if (baij->colmap) {
317 #if defined(PETSC_USE_CTABLE)
318     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
319 #else
320     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
321     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
322 #endif
323   }
324 
325   /* make sure that B is assembled so we can access its values */
326   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
327   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
328 
329   /* invent new B and copy stuff over */
330   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
331   for (i=0; i<mbs; i++) {
332     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
333   }
334   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
335   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
336   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
337   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
338 
339   ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
340 
341   ierr = PetscFree(nz);CHKERRQ(ierr);
342 
343   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
344   for (i=0; i<mbs; i++) {
345     rvals[0] = bs*i;
346     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
347     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
348       col = garray[Bbaij->j[j]]*bs;
349       for (k=0; k<bs; k++) {
350 #if defined(PETSC_USE_REAL_MAT_SINGLE)
351         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
352 #else
353         atmp = a+j*bs2 + k*bs;
354 #endif
355         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
356         col++;
357       }
358     }
359   }
360 #if defined(PETSC_USE_REAL_MAT_SINGLE)
361   ierr = PetscFree(atmp);CHKERRQ(ierr);
362 #endif
363   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
364 
365   baij->garray = 0;
366 
367   ierr = PetscFree(rvals);CHKERRQ(ierr);
368   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
369   ierr = MatDestroy(&B);CHKERRQ(ierr);
370   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
371 
372   baij->B = Bnew;
373 
374   A->was_assembled = PETSC_FALSE;
375   PetscFunctionReturn(0);
376 }
377 
378 
379 
380