xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 #include "src/vec/vecimpl.h"
8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
17   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
18   IS                 from,to;
19   Vec                gvec;
20   int                rank=sbaij->rank,lsize,size=sbaij->size;
21   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
22   PetscMap           vecmap;
23   PetscScalar        *ptr;
24 
25   PetscFunctionBegin;
26   if (sbaij->lvec) {
27     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
28     sbaij->lvec = 0;
29   }
30   if (sbaij->Mvctx) {
31     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
32     sbaij->Mvctx = 0;
33   }
34 
35   /* For the first stab we make an array as long as the number of columns */
36   /* mark those columns that are in sbaij->B */
37   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
38   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
39   for (i=0; i<mbs; i++) {
40     for (j=0; j<B->ilen[i]; j++) {
41       if (!indices[aj[B->i[i] + j]]) ec++;
42       indices[aj[B->i[i] + j] ] = 1;
43     }
44   }
45 
46   /* form arrays of columns we need */
47   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
48   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
49   ec_owner = sgarray + 2*ec;
50 
51   ec = 0;
52   for (j=0; j<size; j++){
53     for (i=owners[j]; i<owners[j+1]; i++){
54       if (indices[i]) {
55         garray[ec]   = i;
56         ec_owner[ec] = j;
57         ec++;
58       }
59     }
60   }
61 
62   /* make indices now point into garray */
63   for (i=0; i<ec; i++) indices[garray[i]] = i;
64 
65   /* compact out the extra columns in B */
66   for (i=0; i<mbs; i++) {
67     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
68   }
69   B->nbs       = ec;
70   sbaij->B->n   = ec*B->bs;
71   ierr = PetscFree(indices);CHKERRQ(ierr);
72 
73   /* create local vector that is used to scatter into */
74   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
75 
76   /* create two temporary index sets for building scatter-gather */
77   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
78   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
79   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
80 
81   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
82   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
83 
84   /* generate the scatter context */
85   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
86   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
87   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
88 
89   sbaij->garray = garray;
90   PetscLogObjectParent(mat,sbaij->Mvctx);
91   PetscLogObjectParent(mat,sbaij->lvec);
92   PetscLogObjectParent(mat,from);
93   PetscLogObjectParent(mat,to);
94 
95   ierr = ISDestroy(from);CHKERRQ(ierr);
96   ierr = ISDestroy(to);CHKERRQ(ierr);
97 
98   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
99   lsize = (mbs + ec)*bs;
100   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
101   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
102   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
103 
104   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
105   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
106 
107   /* x index in the IS sfrom */
108   for (i=0; i<ec; i++) {
109     j = ec_owner[i];
110     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
111   }
112   /* b index in the IS sfrom */
113   k = sowners[rank]/bs + mbs;
114   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
115 
116   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
118 
119   /* x index in the IS sto */
120   k = sowners[rank]/bs + mbs;
121   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
122   /* b index in the IS sto */
123   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
124 
125   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
126 
127   /* gnerate the SBAIJ scatter context */
128   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
129 
130    /*
131       Post the receives for the first matrix vector product. We sync-chronize after
132     this on the chance that the user immediately calls MatMult() after assemblying
133     the matrix.
134   */
135   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
136 
137   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
138   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
139   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
140   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
141   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
142 
143   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
144   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
145   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
146 
147   ierr = PetscFree(stmp);CHKERRQ(ierr);
148   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
149 
150   PetscLogObjectParent(mat,sbaij->sMvctx);
151   PetscLogObjectParent(mat,sbaij->slvec0);
152   PetscLogObjectParent(mat,sbaij->slvec1);
153   PetscLogObjectParent(mat,sbaij->slvec0b);
154   PetscLogObjectParent(mat,sbaij->slvec1a);
155   PetscLogObjectParent(mat,sbaij->slvec1b);
156   PetscLogObjectParent(mat,from);
157   PetscLogObjectParent(mat,to);
158 
159   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
160   ierr = ISDestroy(from);CHKERRQ(ierr);
161   ierr = ISDestroy(to);CHKERRQ(ierr);
162   ierr = VecDestroy(gvec);CHKERRQ(ierr);
163   ierr = PetscFree(sgarray);CHKERRQ(ierr);
164 
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
170 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
171 {
172   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
173   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
174   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
175   int                bs = baij->bs,*stmp;
176   IS                 from,to;
177   Vec                gvec;
178 #if defined (PETSC_USE_CTABLE)
179   PetscTable         gid1_lid1;
180   PetscTablePosition tpos;
181   int                gid,lid;
182 #endif
183 
184   PetscFunctionBegin;
185 
186 #if defined (PETSC_USE_CTABLE)
187   /* use a table - Mark Adams */
188   PetscTableCreate(B->mbs,&gid1_lid1);
189   for (i=0; i<B->mbs; i++) {
190     for (j=0; j<B->ilen[i]; j++) {
191       int data,gid1 = aj[B->i[i]+j] + 1;
192       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
193       if (!data) {
194         /* one based table */
195         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
196       }
197     }
198   }
199   /* form array of columns we need */
200   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
201   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
202   while (tpos) {
203     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
204     gid--; lid--;
205     garray[lid] = gid;
206   }
207   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
208   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
209   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
210   for (i=0; i<ec; i++) {
211     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
212   }
213   /* compact out the extra columns in B */
214   for (i=0; i<B->mbs; i++) {
215     for (j=0; j<B->ilen[i]; j++) {
216       int gid1 = aj[B->i[i] + j] + 1;
217       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
218       lid --;
219       aj[B->i[i]+j] = lid;
220     }
221   }
222   B->nbs     = ec;
223   baij->B->n = ec*B->bs;
224   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
225   /* Mark Adams */
226 #else
227   /* For the first stab we make an array as long as the number of columns */
228   /* mark those columns that are in baij->B */
229   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
230   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
231   for (i=0; i<B->mbs; i++) {
232     for (j=0; j<B->ilen[i]; j++) {
233       if (!indices[aj[B->i[i] + j]]) ec++;
234       indices[aj[B->i[i] + j] ] = 1;
235     }
236   }
237 
238   /* form array of columns we need */
239   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
240   ec = 0;
241   for (i=0; i<Nbs; i++) {
242     if (indices[i]) {
243       garray[ec++] = i;
244     }
245   }
246 
247   /* make indices now point into garray */
248   for (i=0; i<ec; i++) {
249     indices[garray[i]] = i;
250   }
251 
252   /* compact out the extra columns in B */
253   for (i=0; i<B->mbs; i++) {
254     for (j=0; j<B->ilen[i]; j++) {
255       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
256     }
257   }
258   B->nbs       = ec;
259   baij->B->n   = ec*B->bs;
260   ierr = PetscFree(indices);CHKERRQ(ierr);
261 #endif
262 
263   /* create local vector that is used to scatter into */
264   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
265 
266   /* create two temporary index sets for building scatter-gather */
267   for (i=0; i<ec; i++) {
268     garray[i] = bs*garray[i];
269   }
270   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
271   for (i=0; i<ec; i++) {
272     garray[i] = garray[i]/bs;
273   }
274 
275   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
276   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
277   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
278   ierr = PetscFree(stmp);CHKERRQ(ierr);
279 
280   /* create temporary global vector to generate scatter context */
281   /* this is inefficient, but otherwise we must do either
282      1) save garray until the first actual scatter when the vector is known or
283      2) have another way of generating a scatter context without a vector.*/
284   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
285 
286   /* gnerate the scatter context */
287   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
288 
289   /*
290       Post the receives for the first matrix vector product. We sync-chronize after
291     this on the chance that the user immediately calls MatMult() after assemblying
292     the matrix.
293   */
294   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
295   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
296 
297   PetscLogObjectParent(mat,baij->Mvctx);
298   PetscLogObjectParent(mat,baij->lvec);
299   PetscLogObjectParent(mat,from);
300   PetscLogObjectParent(mat,to);
301   baij->garray = garray;
302   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
303   ierr = ISDestroy(from);CHKERRQ(ierr);
304   ierr = ISDestroy(to);CHKERRQ(ierr);
305   ierr = VecDestroy(gvec);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 
310 /*
311      Takes the local part of an already assembled MPIBAIJ matrix
312    and disassembles it. This is to allow new nonzeros into the matrix
313    that require more communication in the matrix vector multiply.
314    Thus certain data-structures must be rebuilt.
315 
316    Kind of slow! But that's what application programmers get when
317    they are sloppy.
318 */
319 #undef __FUNCT__
320 #define __FUNCT__ "DisAssemble_MPISBAIJ"
321 int DisAssemble_MPISBAIJ(Mat A)
322 {
323   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
324   Mat           B = baij->B,Bnew;
325   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
326   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
327   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
328   MatScalar     *a = Bbaij->a;
329   PetscScalar   *atmp;
330 #if defined(PETSC_USE_MAT_SINGLE)
331   int           l;
332 #endif
333 
334   PetscFunctionBegin;
335 #if defined(PETSC_USE_MAT_SINGLE)
336   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
337 #endif
338   /* free stuff related to matrix-vec multiply */
339   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
340   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
341   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
342   if (baij->colmap) {
343 #if defined (PETSC_USE_CTABLE)
344     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
345 #else
346     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
347     baij->colmap = 0;
348     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
349 #endif
350   }
351 
352   /* make sure that B is assembled so we can access its values */
353   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
355 
356   /* invent new B and copy stuff over */
357   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
358   for (i=0; i<mbs; i++) {
359     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
360   }
361   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
362   ierr = PetscFree(nz);CHKERRQ(ierr);
363 
364   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
365   for (i=0; i<mbs; i++) {
366     rvals[0] = bs*i;
367     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
368     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
369       col = garray[Bbaij->j[j]]*bs;
370       for (k=0; k<bs; k++) {
371 #if defined(PETSC_USE_MAT_SINGLE)
372         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
373 #else
374         atmp = a+j*bs2 + k*bs;
375 #endif
376         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
377         col++;
378       }
379     }
380   }
381 #if defined(PETSC_USE_MAT_SINGLE)
382   ierr = PetscFree(atmp);CHKERRQ(ierr);
383 #endif
384   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
385   baij->garray = 0;
386   ierr = PetscFree(rvals);CHKERRQ(ierr);
387   PetscLogObjectMemory(A,-ec*sizeof(int));
388   ierr = MatDestroy(B);CHKERRQ(ierr);
389   PetscLogObjectParent(A,Bnew);
390   baij->B = Bnew;
391   A->was_assembled = PETSC_FALSE;
392   PetscFunctionReturn(0);
393 }
394 
395 
396 
397