xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision d155d4104c12796cc6e93a7e2b3cd74e97fcda36)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
8 
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
18   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS             from,to;
20   Vec            gvec;
21   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
22   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
23   PetscScalar    *ptr;
24 
25   PetscFunctionBegin;
26   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
27 
28   /* For the first stab we make an array as long as the number of columns */
29   /* mark those columns that are in sbaij->B */
30   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
31   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
32   for (i=0; i<mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       if (!indices[aj[B->i[i] + j]]) ec++;
35       indices[aj[B->i[i] + j] ] = 1;
36     }
37   }
38 
39   /* form arrays of columns we need */
40   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
41   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
42 
43   ec = 0;
44   for (j=0; j<size; j++){
45     for (i=owners[j]; i<owners[j+1]; i++){
46       if (indices[i]) {
47         garray[ec]   = i;
48         ec_owner[ec] = j;
49         ec++;
50       }
51     }
52   }
53 
54   /* make indices now point into garray */
55   for (i=0; i<ec; i++) indices[garray[i]] = i;
56 
57   /* compact out the extra columns in B */
58   for (i=0; i<mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
60   }
61   B->nbs      = ec;
62   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
63   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
64   ierr = PetscFree(indices);CHKERRQ(ierr);
65 
66   /* create local vector that is used to scatter into */
67   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
68 
69   /* create two temporary index sets for building scatter-gather */
70   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
71   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
72   for (i=0; i<ec; i++) { stmp[i] = i; }
73   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
74 
75   /* generate the scatter context
76      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
77   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
78   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
79   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
80 
81   sbaij->garray = garray;
82   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
83   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
84   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
85   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
86 
87   ierr = ISDestroy(&from);CHKERRQ(ierr);
88   ierr = ISDestroy(&to);CHKERRQ(ierr);
89 
90   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
91   lsize = (mbs + ec)*bs;
92   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
93   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
94   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
95 
96   sowners = sbaij->slvec0->map->range;
97 
98   /* x index in the IS sfrom */
99   for (i=0; i<ec; i++) {
100     j = ec_owner[i];
101     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
102   }
103   /* b index in the IS sfrom */
104   k = sowners[rank]/bs + mbs;
105   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
106   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
107 
108   /* x index in the IS sto */
109   k = sowners[rank]/bs + mbs;
110   for (i=0; i<ec; i++) stmp[i] = (k + i);
111   /* b index in the IS sto */
112   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
113 
114   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
115 
116   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
117 
118   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
119   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
120   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
122   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
123 
124   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
126   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
127 
128   ierr = PetscFree(stmp);CHKERRQ(ierr);
129   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
130 
131   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
135   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
136   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
139 
140   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
141   ierr = ISDestroy(&from);CHKERRQ(ierr);
142   ierr = ISDestroy(&to);CHKERRQ(ierr);
143   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
149 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
150 {
151   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
152   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
153   PetscErrorCode     ierr;
154   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
155   PetscInt           bs = mat->rmap->bs,*stmp;
156   IS                 from,to;
157   Vec                gvec;
158 #if defined (PETSC_USE_CTABLE)
159   PetscTable         gid1_lid1;
160   PetscTablePosition tpos;
161   PetscInt           gid,lid;
162 #else
163   PetscInt           Nbs = baij->Nbs,*indices;
164 #endif
165 
166   PetscFunctionBegin;
167 #if defined (PETSC_USE_CTABLE)
168   /* use a table - Mark Adams */
169   PetscTableCreate(B->mbs,&gid1_lid1);
170   for (i=0; i<B->mbs; i++) {
171     for (j=0; j<B->ilen[i]; j++) {
172       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
173       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
174       if (!data) {
175         /* one based table */
176         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
177       }
178     }
179   }
180   /* form array of columns we need */
181   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
182   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
183   while (tpos) {
184     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
185     gid--; lid--;
186     garray[lid] = gid;
187   }
188   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
189   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
190   for (i=0; i<ec; i++) {
191     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
192   }
193   /* compact out the extra columns in B */
194   for (i=0; i<B->mbs; i++) {
195     for (j=0; j<B->ilen[i]; j++) {
196       PetscInt gid1 = aj[B->i[i] + j] + 1;
197       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
198       lid --;
199       aj[B->i[i]+j] = lid;
200     }
201   }
202   B->nbs     = ec;
203   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
204   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
205   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
206   /* Mark Adams */
207 #else
208   /* For the first stab we make an array as long as the number of columns */
209   /* mark those columns that are in baij->B */
210   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
211   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
212   for (i=0; i<B->mbs; i++) {
213     for (j=0; j<B->ilen[i]; j++) {
214       if (!indices[aj[B->i[i] + j]]) ec++;
215       indices[aj[B->i[i] + j] ] = 1;
216     }
217   }
218 
219   /* form array of columns we need */
220   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
221   ec = 0;
222   for (i=0; i<Nbs; i++) {
223     if (indices[i]) {
224       garray[ec++] = i;
225     }
226   }
227 
228   /* make indices now point into garray */
229   for (i=0; i<ec; i++) {
230     indices[garray[i]] = i;
231   }
232 
233   /* compact out the extra columns in B */
234   for (i=0; i<B->mbs; i++) {
235     for (j=0; j<B->ilen[i]; j++) {
236       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
237     }
238   }
239   B->nbs       = ec;
240   baij->B->cmap->n   = ec*mat->rmap->bs;
241   ierr = PetscFree(indices);CHKERRQ(ierr);
242 #endif
243 
244   /* create local vector that is used to scatter into */
245   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
246 
247   /* create two temporary index sets for building scatter-gather */
248   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
249 
250   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
251   for (i=0; i<ec; i++) { stmp[i] = i; }
252   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
253 
254   /* create temporary global vector to generate scatter context */
255   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
256   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
257   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
258 
259   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
260   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
261   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
262   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
263   baij->garray = garray;
264   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
265   ierr = ISDestroy(&from);CHKERRQ(ierr);
266   ierr = ISDestroy(&to);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 /*
271      Takes the local part of an already assembled MPISBAIJ matrix
272    and disassembles it. This is to allow new nonzeros into the matrix
273    that require more communication in the matrix vector multiply.
274    Thus certain data-structures must be rebuilt.
275 
276    Kind of slow! But that's what application programmers get when
277    they are sloppy.
278 */
279 #undef __FUNCT__
280 #define __FUNCT__ "DisAssemble_MPISBAIJ"
281 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
282 {
283   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
284   Mat            B = baij->B,Bnew;
285   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
286   PetscErrorCode ierr;
287   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
288   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
289   MatScalar      *a = Bbaij->a;
290   PetscScalar    *atmp;
291 #if defined(PETSC_USE_REAL_MAT_SINGLE)
292   PetscInt       l;
293 #endif
294 
295   PetscFunctionBegin;
296 #if defined(PETSC_USE_REAL_MAT_SINGLE)
297   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
298 #endif
299   /* free stuff related to matrix-vec multiply */
300   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
301   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
302   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
303 
304   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
305   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
306   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
307   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
308   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
309 
310   if (baij->colmap) {
311 #if defined (PETSC_USE_CTABLE)
312     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
313 #else
314     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
315     baij->colmap = 0;
316     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
317 #endif
318   }
319 
320   /* make sure that B is assembled so we can access its values */
321   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
323 
324   /* invent new B and copy stuff over */
325   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
326   for (i=0; i<mbs; i++) {
327     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
328   }
329   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
330   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
331   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
332   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
333   ierr = PetscFree(nz);CHKERRQ(ierr);
334 
335   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
336   for (i=0; i<mbs; i++) {
337     rvals[0] = bs*i;
338     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
339     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
340       col = garray[Bbaij->j[j]]*bs;
341       for (k=0; k<bs; k++) {
342 #if defined(PETSC_USE_REAL_MAT_SINGLE)
343         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
344 #else
345         atmp = a+j*bs2 + k*bs;
346 #endif
347         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
348         col++;
349       }
350     }
351   }
352 #if defined(PETSC_USE_REAL_MAT_SINGLE)
353   ierr = PetscFree(atmp);CHKERRQ(ierr);
354 #endif
355   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
356   baij->garray = 0;
357   ierr = PetscFree(rvals);CHKERRQ(ierr);
358   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
359   ierr = MatDestroy(&B);CHKERRQ(ierr);
360   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
361   baij->B = Bnew;
362   A->was_assembled = PETSC_FALSE;
363   PetscFunctionReturn(0);
364 }
365 
366 
367 
368