xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision a2ec6df8280ae283e8b0f45d20ebe04f5d3dfc7b)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
10 
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
14 int MatSetUpMultiply_MPISBAIJ(Mat mat)
15 {
16   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
17   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
18   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
19   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS                 from,to;
21   Vec                gvec;
22   int                rank=sbaij->rank,lsize,size=sbaij->size;
23   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
24   PetscMap           vecmap;
25   PetscScalar        *ptr;
26 
27   PetscFunctionBegin;
28   if (sbaij->lvec) {
29     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
30     sbaij->lvec = 0;
31   }
32   if (sbaij->Mvctx) {
33     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
34     sbaij->Mvctx = 0;
35   }
36 
37   /* For the first stab we make an array as long as the number of columns */
38   /* mark those columns that are in sbaij->B */
39   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
40   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
41   for (i=0; i<mbs; i++) {
42     for (j=0; j<B->ilen[i]; j++) {
43       if (!indices[aj[B->i[i] + j]]) ec++;
44       indices[aj[B->i[i] + j] ] = 1;
45     }
46   }
47 
48   /* form arrays of columns we need */
49   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
50   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
51   ec_owner = sgarray + 2*ec;
52 
53   ec = 0;
54   for (j=0; j<size; j++){
55     for (i=owners[j]; i<owners[j+1]; i++){
56       if (indices[i]) {
57         garray[ec]   = i;
58         ec_owner[ec] = j;
59         ec++;
60       }
61     }
62   }
63 
64   /* make indices now point into garray */
65   for (i=0; i<ec; i++) indices[garray[i]] = i;
66 
67   /* compact out the extra columns in B */
68   for (i=0; i<mbs; i++) {
69     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
70   }
71   B->nbs       = ec;
72   sbaij->B->n   = ec*B->bs;
73   ierr = PetscFree(indices);CHKERRQ(ierr);
74 
75   /* create local vector that is used to scatter into */
76   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
77 
78   /* create two temporary index sets for building scatter-gather */
79   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
80   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
81   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
82 
83   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
84   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
85 
86   /* generate the scatter context */
87   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
88   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
89   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
90 
91   sbaij->garray = garray;
92   PetscLogObjectParent(mat,sbaij->Mvctx);
93   PetscLogObjectParent(mat,sbaij->lvec);
94   PetscLogObjectParent(mat,from);
95   PetscLogObjectParent(mat,to);
96 
97   ierr = ISDestroy(from);CHKERRQ(ierr);
98   ierr = ISDestroy(to);CHKERRQ(ierr);
99 
100   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
101   lsize = (mbs + ec)*bs;
102   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
103   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
104   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
105 
106   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
107   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
108 
109   /* x index in the IS sfrom */
110   for (i=0; i<ec; i++) {
111     j = ec_owner[i];
112     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
113   }
114   /* b index in the IS sfrom */
115   k = sowners[rank]/bs + mbs;
116   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
117 
118   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
120 
121   /* x index in the IS sto */
122   k = sowners[rank]/bs + mbs;
123   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
124   /* b index in the IS sto */
125   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
126 
127   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
128 
129   /* gnerate the SBAIJ scatter context */
130   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
131 
132    /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
138 
139   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
140   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
141   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
143   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
144 
145   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
146   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
147   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
148 
149   ierr = PetscFree(stmp);CHKERRQ(ierr);
150   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
151 
152   PetscLogObjectParent(mat,sbaij->sMvctx);
153   PetscLogObjectParent(mat,sbaij->slvec0);
154   PetscLogObjectParent(mat,sbaij->slvec1);
155   PetscLogObjectParent(mat,sbaij->slvec0b);
156   PetscLogObjectParent(mat,sbaij->slvec1a);
157   PetscLogObjectParent(mat,sbaij->slvec1b);
158   PetscLogObjectParent(mat,from);
159   PetscLogObjectParent(mat,to);
160 
161   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
162   ierr = ISDestroy(from);CHKERRQ(ierr);
163   ierr = ISDestroy(to);CHKERRQ(ierr);
164   ierr = VecDestroy(gvec);CHKERRQ(ierr);
165   ierr = PetscFree(sgarray);CHKERRQ(ierr);
166 
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
172 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
173 {
174   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
175   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
176   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
177   int                bs = baij->bs,*stmp;
178   IS                 from,to;
179   Vec                gvec;
180 #if defined (PETSC_USE_CTABLE)
181   PetscTable         gid1_lid1;
182   PetscTablePosition tpos;
183   int                gid,lid;
184 #endif
185 
186   PetscFunctionBegin;
187 
188 #if defined (PETSC_USE_CTABLE)
189   /* use a table - Mark Adams */
190   PetscTableCreate(B->mbs,&gid1_lid1);
191   for (i=0; i<B->mbs; i++) {
192     for (j=0; j<B->ilen[i]; j++) {
193       int data,gid1 = aj[B->i[i]+j] + 1;
194       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
195       if (!data) {
196         /* one based table */
197         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
198       }
199     }
200   }
201   /* form array of columns we need */
202   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
203   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
204   while (tpos) {
205     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
206     gid--; lid--;
207     garray[lid] = gid;
208   }
209   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
210   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
211   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
212   for (i=0; i<ec; i++) {
213     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
214   }
215   /* compact out the extra columns in B */
216   for (i=0; i<B->mbs; i++) {
217     for (j=0; j<B->ilen[i]; j++) {
218       int gid1 = aj[B->i[i] + j] + 1;
219       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
220       lid --;
221       aj[B->i[i]+j] = lid;
222     }
223   }
224   B->nbs     = ec;
225   baij->B->n = ec*B->bs;
226   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
227   /* Mark Adams */
228 #else
229   /* For the first stab we make an array as long as the number of columns */
230   /* mark those columns that are in baij->B */
231   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
232   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
233   for (i=0; i<B->mbs; i++) {
234     for (j=0; j<B->ilen[i]; j++) {
235       if (!indices[aj[B->i[i] + j]]) ec++;
236       indices[aj[B->i[i] + j] ] = 1;
237     }
238   }
239 
240   /* form array of columns we need */
241   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
242   ec = 0;
243   for (i=0; i<Nbs; i++) {
244     if (indices[i]) {
245       garray[ec++] = i;
246     }
247   }
248 
249   /* make indices now point into garray */
250   for (i=0; i<ec; i++) {
251     indices[garray[i]] = i;
252   }
253 
254   /* compact out the extra columns in B */
255   for (i=0; i<B->mbs; i++) {
256     for (j=0; j<B->ilen[i]; j++) {
257       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
258     }
259   }
260   B->nbs       = ec;
261   baij->B->n   = ec*B->bs;
262   ierr = PetscFree(indices);CHKERRQ(ierr);
263 #endif
264 
265   /* create local vector that is used to scatter into */
266   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
267 
268   /* create two temporary index sets for building scatter-gather */
269   for (i=0; i<ec; i++) {
270     garray[i] = bs*garray[i];
271   }
272   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
273   for (i=0; i<ec; i++) {
274     garray[i] = garray[i]/bs;
275   }
276 
277   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
278   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
279   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
280   ierr = PetscFree(stmp);CHKERRQ(ierr);
281 
282   /* create temporary global vector to generate scatter context */
283   /* this is inefficient, but otherwise we must do either
284      1) save garray until the first actual scatter when the vector is known or
285      2) have another way of generating a scatter context without a vector.*/
286   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
287 
288   /* gnerate the scatter context */
289   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
290 
291   /*
292       Post the receives for the first matrix vector product. We sync-chronize after
293     this on the chance that the user immediately calls MatMult() after assemblying
294     the matrix.
295   */
296   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
297   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
298 
299   PetscLogObjectParent(mat,baij->Mvctx);
300   PetscLogObjectParent(mat,baij->lvec);
301   PetscLogObjectParent(mat,from);
302   PetscLogObjectParent(mat,to);
303   baij->garray = garray;
304   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
305   ierr = ISDestroy(from);CHKERRQ(ierr);
306   ierr = ISDestroy(to);CHKERRQ(ierr);
307   ierr = VecDestroy(gvec);CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 
312 /*
313      Takes the local part of an already assembled MPIBAIJ matrix
314    and disassembles it. This is to allow new nonzeros into the matrix
315    that require more communication in the matrix vector multiply.
316    Thus certain data-structures must be rebuilt.
317 
318    Kind of slow! But that's what application programmers get when
319    they are sloppy.
320 */
321 #undef __FUNCT__
322 #define __FUNCT__ "DisAssemble_MPISBAIJ"
323 int DisAssemble_MPISBAIJ(Mat A)
324 {
325   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
326   Mat           B = baij->B,Bnew;
327   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
328   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
329   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
330   MatScalar     *a = Bbaij->a;
331   PetscScalar   *atmp;
332 #if defined(PETSC_USE_MAT_SINGLE)
333   int           l;
334 #endif
335 
336   PetscFunctionBegin;
337 #if defined(PETSC_USE_MAT_SINGLE)
338   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
339 #endif
340   /* free stuff related to matrix-vec multiply */
341   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
342   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
343   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
344   if (baij->colmap) {
345 #if defined (PETSC_USE_CTABLE)
346     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
347 #else
348     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
349     baij->colmap = 0;
350     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
351 #endif
352   }
353 
354   /* make sure that B is assembled so we can access its values */
355   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
357 
358   /* invent new B and copy stuff over */
359   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
360   for (i=0; i<mbs; i++) {
361     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
362   }
363   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
364   ierr = PetscFree(nz);CHKERRQ(ierr);
365 
366   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
367   for (i=0; i<mbs; i++) {
368     rvals[0] = bs*i;
369     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
370     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
371       col = garray[Bbaij->j[j]]*bs;
372       for (k=0; k<bs; k++) {
373 #if defined(PETSC_USE_MAT_SINGLE)
374         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
375 #else
376         atmp = a+j*bs2 + k*bs;
377 #endif
378         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
379         col++;
380       }
381     }
382   }
383 #if defined(PETSC_USE_MAT_SINGLE)
384   ierr = PetscFree(atmp);CHKERRQ(ierr);
385 #endif
386   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
387   baij->garray = 0;
388   ierr = PetscFree(rvals);CHKERRQ(ierr);
389   PetscLogObjectMemory(A,-ec*sizeof(int));
390   ierr = MatDestroy(B);CHKERRQ(ierr);
391   PetscLogObjectParent(A,Bnew);
392   baij->B = Bnew;
393   A->was_assembled = PETSC_FALSE;
394   PetscFunctionReturn(0);
395 }
396 
397 
398 
399