xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 125ce239757bf2f1167bf5510f45efdfced1ff9e)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 int MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
18   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
19   IS                 from,to;
20   Vec                gvec;
21   int                rank=sbaij->rank,lsize,size=sbaij->size;
22   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
23   PetscMap           vecmap;
24   PetscScalar        *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->lvec) {
28     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
29     sbaij->lvec = 0;
30   }
31   if (sbaij->Mvctx) {
32     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
33     sbaij->Mvctx = 0;
34   }
35 
36   /* For the first stab we make an array as long as the number of columns */
37   /* mark those columns that are in sbaij->B */
38   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
39   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
40   for (i=0; i<mbs; i++) {
41     for (j=0; j<B->ilen[i]; j++) {
42       if (!indices[aj[B->i[i] + j]]) ec++;
43       indices[aj[B->i[i] + j] ] = 1;
44     }
45   }
46 
47   /* form arrays of columns we need */
48   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
49   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
50   ec_owner = sgarray + 2*ec;
51 
52   ec = 0;
53   for (j=0; j<size; j++){
54     for (i=owners[j]; i<owners[j+1]; i++){
55       if (indices[i]) {
56         garray[ec]   = i;
57         ec_owner[ec] = j;
58         ec++;
59       }
60     }
61   }
62 
63   /* make indices now point into garray */
64   for (i=0; i<ec; i++) indices[garray[i]] = i;
65 
66   /* compact out the extra columns in B */
67   for (i=0; i<mbs; i++) {
68     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
69   }
70   B->nbs      = ec;
71   sbaij->B->n = ec*B->bs;
72   ierr = PetscFree(indices);CHKERRQ(ierr);
73 
74   /* create local vector that is used to scatter into */
75   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
76 
77   /* create two temporary index sets for building scatter-gather */
78   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
79   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
80   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
81 
82   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
83   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
84 
85   /* generate the scatter context
86      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
87   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
88   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
89   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
90 
91   sbaij->garray = garray;
92   PetscLogObjectParent(mat,sbaij->Mvctx);
93   PetscLogObjectParent(mat,sbaij->lvec);
94   PetscLogObjectParent(mat,from);
95   PetscLogObjectParent(mat,to);
96 
97   ierr = ISDestroy(from);CHKERRQ(ierr);
98   ierr = ISDestroy(to);CHKERRQ(ierr);
99 
100   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
101   lsize = (mbs + ec)*bs;
102   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
103   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
104   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
105 
106   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
107   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
108 
109   /* x index in the IS sfrom */
110   for (i=0; i<ec; i++) {
111     j = ec_owner[i];
112     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
113   }
114   /* b index in the IS sfrom */
115   k = sowners[rank]/bs + mbs;
116   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
117 
118   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
120 
121   /* x index in the IS sto */
122   k = sowners[rank]/bs + mbs;
123   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
124   /* b index in the IS sto */
125   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
126 
127   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
128 
129   /* gnerate the SBAIJ scatter context */
130   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
131 
132    /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
138 
139   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
140   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
141   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
143   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
144 
145   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
146   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
147   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
148 
149   ierr = PetscFree(stmp);CHKERRQ(ierr);
150   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
151 
152   PetscLogObjectParent(mat,sbaij->sMvctx);
153   PetscLogObjectParent(mat,sbaij->slvec0);
154   PetscLogObjectParent(mat,sbaij->slvec1);
155   PetscLogObjectParent(mat,sbaij->slvec0b);
156   PetscLogObjectParent(mat,sbaij->slvec1a);
157   PetscLogObjectParent(mat,sbaij->slvec1b);
158   PetscLogObjectParent(mat,from);
159   PetscLogObjectParent(mat,to);
160 
161   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
162   ierr = ISDestroy(from);CHKERRQ(ierr);
163   ierr = ISDestroy(to);CHKERRQ(ierr);
164   ierr = VecDestroy(gvec);CHKERRQ(ierr);
165   ierr = PetscFree(sgarray);CHKERRQ(ierr);
166 
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
172 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
173 {
174   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
175   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
176   int                i,j,*aj = B->j,ierr,ec = 0,*garray;
177   int                bs = baij->bs,*stmp;
178   IS                 from,to;
179   Vec                gvec;
180 #if defined (PETSC_USE_CTABLE)
181   PetscTable         gid1_lid1;
182   PetscTablePosition tpos;
183   int                gid,lid;
184 #else
185   int                Nbs = baij->Nbs,*indices;
186 #endif
187 
188   PetscFunctionBegin;
189 
190 #if defined (PETSC_USE_CTABLE)
191   /* use a table - Mark Adams */
192   PetscTableCreate(B->mbs,&gid1_lid1);
193   for (i=0; i<B->mbs; i++) {
194     for (j=0; j<B->ilen[i]; j++) {
195       int data,gid1 = aj[B->i[i]+j] + 1;
196       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
197       if (!data) {
198         /* one based table */
199         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
200       }
201     }
202   }
203   /* form array of columns we need */
204   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
205   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
206   while (tpos) {
207     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
208     gid--; lid--;
209     garray[lid] = gid;
210   }
211   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
212   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
213   for (i=0; i<ec; i++) {
214     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
215   }
216   /* compact out the extra columns in B */
217   for (i=0; i<B->mbs; i++) {
218     for (j=0; j<B->ilen[i]; j++) {
219       int gid1 = aj[B->i[i] + j] + 1;
220       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
221       lid --;
222       aj[B->i[i]+j] = lid;
223     }
224   }
225   B->nbs     = ec;
226   baij->B->n = ec*B->bs;
227   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
228   /* Mark Adams */
229 #else
230   /* For the first stab we make an array as long as the number of columns */
231   /* mark those columns that are in baij->B */
232   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
233   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
234   for (i=0; i<B->mbs; i++) {
235     for (j=0; j<B->ilen[i]; j++) {
236       if (!indices[aj[B->i[i] + j]]) ec++;
237       indices[aj[B->i[i] + j] ] = 1;
238     }
239   }
240 
241   /* form array of columns we need */
242   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
243   ec = 0;
244   for (i=0; i<Nbs; i++) {
245     if (indices[i]) {
246       garray[ec++] = i;
247     }
248   }
249 
250   /* make indices now point into garray */
251   for (i=0; i<ec; i++) {
252     indices[garray[i]] = i;
253   }
254 
255   /* compact out the extra columns in B */
256   for (i=0; i<B->mbs; i++) {
257     for (j=0; j<B->ilen[i]; j++) {
258       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
259     }
260   }
261   B->nbs       = ec;
262   baij->B->n   = ec*B->bs;
263   ierr = PetscFree(indices);CHKERRQ(ierr);
264 #endif
265 
266   /* create local vector that is used to scatter into */
267   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
268 
269   /* create two temporary index sets for building scatter-gather */
270   for (i=0; i<ec; i++) {
271     garray[i] = bs*garray[i];
272   }
273   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
274   for (i=0; i<ec; i++) {
275     garray[i] = garray[i]/bs;
276   }
277 
278   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
279   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
280   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
281   ierr = PetscFree(stmp);CHKERRQ(ierr);
282 
283   /* create temporary global vector to generate scatter context */
284   /* this is inefficient, but otherwise we must do either
285      1) save garray until the first actual scatter when the vector is known or
286      2) have another way of generating a scatter context without a vector.*/
287   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
288 
289   /* gnerate the scatter context */
290   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
291 
292   /*
293       Post the receives for the first matrix vector product. We sync-chronize after
294     this on the chance that the user immediately calls MatMult() after assemblying
295     the matrix.
296   */
297   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
298   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
299 
300   PetscLogObjectParent(mat,baij->Mvctx);
301   PetscLogObjectParent(mat,baij->lvec);
302   PetscLogObjectParent(mat,from);
303   PetscLogObjectParent(mat,to);
304   baij->garray = garray;
305   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
306   ierr = ISDestroy(from);CHKERRQ(ierr);
307   ierr = ISDestroy(to);CHKERRQ(ierr);
308   ierr = VecDestroy(gvec);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 
313 /*
314      Takes the local part of an already assembled MPIBAIJ matrix
315    and disassembles it. This is to allow new nonzeros into the matrix
316    that require more communication in the matrix vector multiply.
317    Thus certain data-structures must be rebuilt.
318 
319    Kind of slow! But that's what application programmers get when
320    they are sloppy.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "DisAssemble_MPISBAIJ"
324 int DisAssemble_MPISBAIJ(Mat A)
325 {
326   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
327   Mat           B = baij->B,Bnew;
328   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
329   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
330   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
331   MatScalar     *a = Bbaij->a;
332   PetscScalar   *atmp;
333 #if defined(PETSC_USE_MAT_SINGLE)
334   int           l;
335 #endif
336 
337   PetscFunctionBegin;
338 #if defined(PETSC_USE_MAT_SINGLE)
339   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
340 #endif
341   /* free stuff related to matrix-vec multiply */
342   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
343   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
344   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
345   if (baij->colmap) {
346 #if defined (PETSC_USE_CTABLE)
347     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
348 #else
349     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
350     baij->colmap = 0;
351     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
352 #endif
353   }
354 
355   /* make sure that B is assembled so we can access its values */
356   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
357   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358 
359   /* invent new B and copy stuff over */
360   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
361   for (i=0; i<mbs; i++) {
362     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
363   }
364   ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr);
365   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
366   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
367   ierr = PetscFree(nz);CHKERRQ(ierr);
368 
369   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
370   for (i=0; i<mbs; i++) {
371     rvals[0] = bs*i;
372     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
373     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
374       col = garray[Bbaij->j[j]]*bs;
375       for (k=0; k<bs; k++) {
376 #if defined(PETSC_USE_MAT_SINGLE)
377         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
378 #else
379         atmp = a+j*bs2 + k*bs;
380 #endif
381         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
382         col++;
383       }
384     }
385   }
386 #if defined(PETSC_USE_MAT_SINGLE)
387   ierr = PetscFree(atmp);CHKERRQ(ierr);
388 #endif
389   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
390   baij->garray = 0;
391   ierr = PetscFree(rvals);CHKERRQ(ierr);
392   PetscLogObjectMemory(A,-ec*sizeof(int));
393   ierr = MatDestroy(B);CHKERRQ(ierr);
394   PetscLogObjectParent(A,Bnew);
395   baij->B = Bnew;
396   A->was_assembled = PETSC_FALSE;
397   PetscFunctionReturn(0);
398 }
399 
400 
401 
402