xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 19bfd8f8fa516b4fd4ffe950a75fc2bc24267a71)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS             from,to;
20   Vec            gvec;
21   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
22   PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
23   const PetscInt *sowners;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
28 
29   /* For the first stab we make an array as long as the number of columns */
30   /* mark those columns that are in sbaij->B */
31   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
32   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
33   for (i=0; i<mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       if (!indices[aj[B->i[i] + j]]) ec++;
36       indices[aj[B->i[i] + j]] = 1;
37     }
38   }
39 
40   /* form arrays of columns we need */
41   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
42   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
43 
44   ec = 0;
45   for (j=0; j<size; j++) {
46     for (i=owners[j]; i<owners[j+1]; i++) {
47       if (indices[i]) {
48         garray[ec]   = i;
49         ec_owner[ec] = j;
50         ec++;
51       }
52     }
53   }
54 
55   /* make indices now point into garray */
56   for (i=0; i<ec; i++) indices[garray[i]] = i;
57 
58   /* compact out the extra columns in B */
59   for (i=0; i<mbs; i++) {
60     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
61   }
62   B->nbs = ec;
63 
64   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
65 
66   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
67   ierr = PetscFree(indices);CHKERRQ(ierr);
68 
69   /* create local vector that is used to scatter into */
70   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
71 
72   /* create two temporary index sets for building scatter-gather */
73   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
74   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
75   for (i=0; i<ec; i++) stmp[i] = i;
76   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
77 
78   /* generate the scatter context
79      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
80   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
81   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
82   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
83 
84   sbaij->garray = garray;
85 
86   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
87   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
88   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
89   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
90 
91   ierr = ISDestroy(&from);CHKERRQ(ierr);
92   ierr = ISDestroy(&to);CHKERRQ(ierr);
93 
94   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
95   lsize = (mbs + ec)*bs;
96   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
97   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
98   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
99 
100   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
101 
102   /* x index in the IS sfrom */
103   for (i=0; i<ec; i++) {
104     j          = ec_owner[i];
105     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
106   }
107   /* b index in the IS sfrom */
108   k = sowners[rank]/bs + mbs;
109   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
110   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
111 
112   /* x index in the IS sto */
113   k = sowners[rank]/bs + mbs;
114   for (i=0; i<ec; i++) stmp[i] = (k + i);
115   /* b index in the IS sto */
116   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
117 
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
119 
120   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
121 
122   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
123   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
124   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
126   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
127 
128   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
130   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
131 
132   ierr = PetscFree(stmp);CHKERRQ(ierr);
133   ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
134 
135   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
140   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
143 
144   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
145   ierr = ISDestroy(&from);CHKERRQ(ierr);
146   ierr = ISDestroy(&to);CHKERRQ(ierr);
147   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 #undef __FUNCT__
152 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
153 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
154 {
155   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
156   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
157   PetscErrorCode ierr;
158   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
159   PetscInt       bs = mat->rmap->bs,*stmp;
160   IS             from,to;
161   Vec            gvec;
162 #if defined(PETSC_USE_CTABLE)
163   PetscTable         gid1_lid1;
164   PetscTablePosition tpos;
165   PetscInt           gid,lid;
166 #else
167   PetscInt Nbs = baij->Nbs,*indices;
168 #endif
169 
170   PetscFunctionBegin;
171 #if defined(PETSC_USE_CTABLE)
172   /* use a table - Mark Adams */
173   PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
174   for (i=0; i<B->mbs; i++) {
175     for (j=0; j<B->ilen[i]; j++) {
176       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
177       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
178       if (!data) {
179         /* one based table */
180         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
181       }
182     }
183   }
184   /* form array of columns we need */
185   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
186   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
187   while (tpos) {
188     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
189     gid--; lid--;
190     garray[lid] = gid;
191   }
192   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
193   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
194   for (i=0; i<ec; i++) {
195     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
196   }
197   /* compact out the extra columns in B */
198   for (i=0; i<B->mbs; i++) {
199     for (j=0; j<B->ilen[i]; j++) {
200       PetscInt gid1 = aj[B->i[i] + j] + 1;
201       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
202       lid--;
203       aj[B->i[i]+j] = lid;
204     }
205   }
206   B->nbs = ec;
207 
208   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
209 
210   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
211   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
212 #else
213   /* For the first stab we make an array as long as the number of columns */
214   /* mark those columns that are in baij->B */
215   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
216   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
217   for (i=0; i<B->mbs; i++) {
218     for (j=0; j<B->ilen[i]; j++) {
219       if (!indices[aj[B->i[i] + j]]) ec++;
220       indices[aj[B->i[i] + j]] = 1;
221     }
222   }
223 
224   /* form array of columns we need */
225   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
226   ec = 0;
227   for (i=0; i<Nbs; i++) {
228     if (indices[i]) {
229       garray[ec++] = i;
230     }
231   }
232 
233   /* make indices now point into garray */
234   for (i=0; i<ec; i++) indices[garray[i]] = i;
235 
236   /* compact out the extra columns in B */
237   for (i=0; i<B->mbs; i++) {
238     for (j=0; j<B->ilen[i]; j++) {
239       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
240     }
241   }
242   B->nbs = ec;
243 
244   baij->B->cmap->n = ec*mat->rmap->bs;
245 
246   ierr = PetscFree(indices);CHKERRQ(ierr);
247 #endif
248 
249   /* create local vector that is used to scatter into */
250   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
251 
252   /* create two temporary index sets for building scatter-gather */
253   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
254 
255   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
256   for (i=0; i<ec; i++) stmp[i] = i;
257   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
258 
259   /* create temporary global vector to generate scatter context */
260   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
261   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
262   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
263 
264   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
265   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
266   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
267   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
268 
269   baij->garray = garray;
270 
271   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
272   ierr = ISDestroy(&from);CHKERRQ(ierr);
273   ierr = ISDestroy(&to);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 /*
278      Takes the local part of an already assembled MPISBAIJ matrix
279    and disassembles it. This is to allow new nonzeros into the matrix
280    that require more communication in the matrix vector multiply.
281    Thus certain data-structures must be rebuilt.
282 
283    Kind of slow! But that's what application programmers get when
284    they are sloppy.
285 */
286 #undef __FUNCT__
287 #define __FUNCT__ "MatDisAssemble_MPISBAIJ"
288 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
289 {
290   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
291   Mat            B      = baij->B,Bnew;
292   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
293   PetscErrorCode ierr;
294   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
295   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
296   MatScalar      *a = Bbaij->a;
297   PetscScalar    *atmp;
298 #if defined(PETSC_USE_REAL_MAT_SINGLE)
299   PetscInt l;
300 #endif
301 
302   PetscFunctionBegin;
303 #if defined(PETSC_USE_REAL_MAT_SINGLE)
304   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
305 #endif
306   /* free stuff related to matrix-vec multiply */
307   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
308   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
309   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
310 
311   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
312   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
313   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
314   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
315   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
316 
317   if (baij->colmap) {
318 #if defined(PETSC_USE_CTABLE)
319     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
320 #else
321     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
322     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
323 #endif
324   }
325 
326   /* make sure that B is assembled so we can access its values */
327   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
329 
330   /* invent new B and copy stuff over */
331   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
332   for (i=0; i<mbs; i++) {
333     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
334   }
335   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
336   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
337   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
338   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
339 
340   ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
341 
342   ierr = PetscFree(nz);CHKERRQ(ierr);
343 
344   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
345   for (i=0; i<mbs; i++) {
346     rvals[0] = bs*i;
347     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
348     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
349       col = garray[Bbaij->j[j]]*bs;
350       for (k=0; k<bs; k++) {
351 #if defined(PETSC_USE_REAL_MAT_SINGLE)
352         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
353 #else
354         atmp = a+j*bs2 + k*bs;
355 #endif
356         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
357         col++;
358       }
359     }
360   }
361 #if defined(PETSC_USE_REAL_MAT_SINGLE)
362   ierr = PetscFree(atmp);CHKERRQ(ierr);
363 #endif
364   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
365 
366   baij->garray = 0;
367 
368   ierr = PetscFree(rvals);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
370   ierr = MatDestroy(&B);CHKERRQ(ierr);
371   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
372 
373   baij->B = Bnew;
374 
375   A->was_assembled = PETSC_FALSE;
376   PetscFunctionReturn(0);
377 }
378 
379 
380 
381