xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision e2d9671b86a8697a4c2c5fff9ed2002d1fb085ae)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 int MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
18   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS                 from,to;
20   Vec                gvec;
21   int                rank=sbaij->rank,lsize,size=sbaij->size;
22   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
23   PetscMap           vecmap;
24   PetscScalar        *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->lvec) {
28     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
29     sbaij->lvec = 0;
30   }
31   if (sbaij->Mvctx) {
32     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
33     sbaij->Mvctx = 0;
34   }
35 
36   /* For the first stab we make an array as long as the number of columns */
37   /* mark those columns that are in sbaij->B */
38   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
39   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<mbs; i++) {
41     for (j=0; j<B->ilen[i]; j++) {
42       if (!indices[aj[B->i[i] + j]]) ec++;
43       indices[aj[B->i[i] + j] ] = 1;
44     }
45   }
46 
47   /* form arrays of columns we need */
48   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
49   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
50   ec_owner = sgarray + 2*ec;
51 
52   ec = 0;
53   for (j=0; j<size; j++){
54     for (i=owners[j]; i<owners[j+1]; i++){
55       if (indices[i]) {
56         garray[ec]   = i;
57         ec_owner[ec] = j;
58         ec++;
59       }
60     }
61   }
62 
63   /* make indices now point into garray */
64   for (i=0; i<ec; i++) indices[garray[i]] = i;
65 
66   /* compact out the extra columns in B */
67   for (i=0; i<mbs; i++) {
68     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
69   }
70   B->nbs      = ec;
71   sbaij->B->n = ec*B->bs;
72   ierr = PetscFree(indices);CHKERRQ(ierr);
73 
74   /* create local vector that is used to scatter into */
75   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
76 
77   /* create two temporary index sets for building scatter-gather */
78   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
79   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
80   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
81 
82   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
83   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
84 
85   /* generate the scatter context
86      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
87   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
88   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
89   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
90 
91   sbaij->garray = garray;
92   PetscLogObjectParent(mat,sbaij->Mvctx);
93   PetscLogObjectParent(mat,sbaij->lvec);
94   PetscLogObjectParent(mat,from);
95   PetscLogObjectParent(mat,to);
96 
97   ierr = ISDestroy(from);CHKERRQ(ierr);
98   ierr = ISDestroy(to);CHKERRQ(ierr);
99 
100   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
101   lsize = (mbs + ec)*bs;
102   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
103   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
104   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
105 
106   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
107   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
108 
109   /* x index in the IS sfrom */
110   for (i=0; i<ec; i++) {
111     j = ec_owner[i];
112     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
113   }
114   /* b index in the IS sfrom */
115   k = sowners[rank]/bs + mbs;
116   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
117 
118   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
120 
121   /* x index in the IS sto */
122   k = sowners[rank]/bs + mbs;
123   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
124   /* b index in the IS sto */
125   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
126 
127   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
128 
129   /* gnerate the SBAIJ scatter context */
130   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
131 
132    /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
138 
139   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
140   ierr = VecGetArrayFast(sbaij->slvec1,&ptr);CHKERRQ(ierr);
141   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
143   ierr = VecRestoreArrayFast(sbaij->slvec1,&ptr);CHKERRQ(ierr);
144 
145   ierr = VecGetArrayFast(sbaij->slvec0,&ptr);CHKERRQ(ierr);
146   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
147   ierr = VecRestoreArrayFast(sbaij->slvec0,&ptr);CHKERRQ(ierr);
148 
149   ierr = PetscFree(stmp);CHKERRQ(ierr);
150   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
151 
152   PetscLogObjectParent(mat,sbaij->sMvctx);
153   PetscLogObjectParent(mat,sbaij->slvec0);
154   PetscLogObjectParent(mat,sbaij->slvec1);
155   PetscLogObjectParent(mat,sbaij->slvec0b);
156   PetscLogObjectParent(mat,sbaij->slvec1a);
157   PetscLogObjectParent(mat,sbaij->slvec1b);
158   PetscLogObjectParent(mat,from);
159   PetscLogObjectParent(mat,to);
160 
161   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
162   ierr = ISDestroy(from);CHKERRQ(ierr);
163   ierr = ISDestroy(to);CHKERRQ(ierr);
164   ierr = VecDestroy(gvec);CHKERRQ(ierr);
165   ierr = PetscFree(sgarray);CHKERRQ(ierr);
166 
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
172 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
173 {
174   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
175   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
176   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
177   int                bs = baij->bs,*stmp;
178   IS                 from,to;
179   Vec                gvec;
180 #if defined (PETSC_USE_CTABLE)
181   PetscTable         gid1_lid1;
182   PetscTablePosition tpos;
183   int                gid,lid;
184 #endif
185 
186   PetscFunctionBegin;
187 
188 #if defined (PETSC_USE_CTABLE)
189   /* use a table - Mark Adams */
190   PetscTableCreate(B->mbs,&gid1_lid1);
191   for (i=0; i<B->mbs; i++) {
192     for (j=0; j<B->ilen[i]; j++) {
193       int data,gid1 = aj[B->i[i]+j] + 1;
194       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
195       if (!data) {
196         /* one based table */
197         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
198       }
199     }
200   }
201   /* form array of columns we need */
202   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
203   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
204   while (tpos) {
205     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
206     gid--; lid--;
207     garray[lid] = gid;
208   }
209   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
210   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
211   for (i=0; i<ec; i++) {
212     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
213   }
214   /* compact out the extra columns in B */
215   for (i=0; i<B->mbs; i++) {
216     for (j=0; j<B->ilen[i]; j++) {
217       int gid1 = aj[B->i[i] + j] + 1;
218       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
219       lid --;
220       aj[B->i[i]+j] = lid;
221     }
222   }
223   B->nbs     = ec;
224   baij->B->n = ec*B->bs;
225   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
226   /* Mark Adams */
227 #else
228   /* For the first stab we make an array as long as the number of columns */
229   /* mark those columns that are in baij->B */
230   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
231   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
232   for (i=0; i<B->mbs; i++) {
233     for (j=0; j<B->ilen[i]; j++) {
234       if (!indices[aj[B->i[i] + j]]) ec++;
235       indices[aj[B->i[i] + j] ] = 1;
236     }
237   }
238 
239   /* form array of columns we need */
240   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
241   ec = 0;
242   for (i=0; i<Nbs; i++) {
243     if (indices[i]) {
244       garray[ec++] = i;
245     }
246   }
247 
248   /* make indices now point into garray */
249   for (i=0; i<ec; i++) {
250     indices[garray[i]] = i;
251   }
252 
253   /* compact out the extra columns in B */
254   for (i=0; i<B->mbs; i++) {
255     for (j=0; j<B->ilen[i]; j++) {
256       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
257     }
258   }
259   B->nbs       = ec;
260   baij->B->n   = ec*B->bs;
261   ierr = PetscFree(indices);CHKERRQ(ierr);
262 #endif
263 
264   /* create local vector that is used to scatter into */
265   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
266 
267   /* create two temporary index sets for building scatter-gather */
268   for (i=0; i<ec; i++) {
269     garray[i] = bs*garray[i];
270   }
271   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
272   for (i=0; i<ec; i++) {
273     garray[i] = garray[i]/bs;
274   }
275 
276   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
277   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
278   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
279   ierr = PetscFree(stmp);CHKERRQ(ierr);
280 
281   /* create temporary global vector to generate scatter context */
282   /* this is inefficient, but otherwise we must do either
283      1) save garray until the first actual scatter when the vector is known or
284      2) have another way of generating a scatter context without a vector.*/
285   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
286 
287   /* gnerate the scatter context */
288   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
289 
290   /*
291       Post the receives for the first matrix vector product. We sync-chronize after
292     this on the chance that the user immediately calls MatMult() after assemblying
293     the matrix.
294   */
295   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
296   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
297 
298   PetscLogObjectParent(mat,baij->Mvctx);
299   PetscLogObjectParent(mat,baij->lvec);
300   PetscLogObjectParent(mat,from);
301   PetscLogObjectParent(mat,to);
302   baij->garray = garray;
303   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
304   ierr = ISDestroy(from);CHKERRQ(ierr);
305   ierr = ISDestroy(to);CHKERRQ(ierr);
306   ierr = VecDestroy(gvec);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 
311 /*
312      Takes the local part of an already assembled MPIBAIJ matrix
313    and disassembles it. This is to allow new nonzeros into the matrix
314    that require more communication in the matrix vector multiply.
315    Thus certain data-structures must be rebuilt.
316 
317    Kind of slow! But that's what application programmers get when
318    they are sloppy.
319 */
320 #undef __FUNCT__
321 #define __FUNCT__ "DisAssemble_MPISBAIJ"
322 int DisAssemble_MPISBAIJ(Mat A)
323 {
324   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
325   Mat           B = baij->B,Bnew;
326   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
327   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
328   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
329   MatScalar     *a = Bbaij->a;
330   PetscScalar   *atmp;
331 #if defined(PETSC_USE_MAT_SINGLE)
332   int           l;
333 #endif
334 
335   PetscFunctionBegin;
336 #if defined(PETSC_USE_MAT_SINGLE)
337   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
338 #endif
339   /* free stuff related to matrix-vec multiply */
340   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
341   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
342   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
343   if (baij->colmap) {
344 #if defined (PETSC_USE_CTABLE)
345     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
346 #else
347     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
348     baij->colmap = 0;
349     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
350 #endif
351   }
352 
353   /* make sure that B is assembled so we can access its values */
354   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
355   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356 
357   /* invent new B and copy stuff over */
358   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
359   for (i=0; i<mbs; i++) {
360     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
361   }
362   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
363   ierr = PetscFree(nz);CHKERRQ(ierr);
364 
365   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
366   for (i=0; i<mbs; i++) {
367     rvals[0] = bs*i;
368     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
369     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
370       col = garray[Bbaij->j[j]]*bs;
371       for (k=0; k<bs; k++) {
372 #if defined(PETSC_USE_MAT_SINGLE)
373         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
374 #else
375         atmp = a+j*bs2 + k*bs;
376 #endif
377         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
378         col++;
379       }
380     }
381   }
382 #if defined(PETSC_USE_MAT_SINGLE)
383   ierr = PetscFree(atmp);CHKERRQ(ierr);
384 #endif
385   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
386   baij->garray = 0;
387   ierr = PetscFree(rvals);CHKERRQ(ierr);
388   PetscLogObjectMemory(A,-ec*sizeof(int));
389   ierr = MatDestroy(B);CHKERRQ(ierr);
390   PetscLogObjectParent(A,Bnew);
391   baij->B = Bnew;
392   A->was_assembled = PETSC_FALSE;
393   PetscFunctionReturn(0);
394 }
395 
396 
397 
398