xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision e11b0e149a41efd678d96ff732835a7007915d7d)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 #include "src/vec/vecimpl.h"
8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
17   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
18   IS                 from,to;
19   Vec                gvec;
20   int                rank=sbaij->rank,lsize,size=sbaij->size;
21   int                *owners=sbaij->rowners,*sowners,*ec_owner,prank=100,k;
22   PetscMap           vecmap;
23   PetscScalar        *ptr;
24 
25   PetscFunctionBegin;
26   /*
27   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], MatSetUpMultiply_MPISBAIJ is called ...\n",rank);
28   PetscSynchronizedFlush(PETSC_COMM_WORLD);
29   */
30   ierr = PetscOptionsGetInt(PETSC_NULL,"-prank",&prank,PETSC_NULL);CHKERRQ(ierr);
31   if (sbaij->lvec) {
32     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
33     sbaij->lvec = 0;
34   }
35   if (sbaij->Mvctx) {
36     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
37     sbaij->Mvctx = 0;
38   }
39 
40   /* For the first stab we make an array as long as the number of columns */
41   /* mark those columns that are in sbaij->B */
42   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
43   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
44   for (i=0; i<mbs; i++) {
45     for (j=0; j<B->ilen[i]; j++) {
46       if (!indices[aj[B->i[i] + j]]) ec++;
47       indices[aj[B->i[i] + j] ] = 1;
48     }
49   }
50 
51   /* form arrays of columns we need */
52   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
53   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
54   ec_owner = sgarray + 2*ec;
55 
56   ec = 0;
57   for (j=0; j<size; j++){
58     for (i=owners[j]; i<owners[j+1]; i++){
59       if (indices[i]) {
60         garray[ec]   = i;
61         ec_owner[ec] = j;
62         ec++;
63       }
64     }
65   }
66 
67   if (rank == prank){
68     printf("proc[%d]: \n",rank);
69     for (i=0; i<ec;i++)  printf("[%d]: garray = %d, ec_owner = %d\n", i,garray[i], ec_owner[i]);
70   }
71 
72   /* make indices now point into garray */
73   for (i=0; i<ec; i++) {
74     indices[garray[i]] = i;
75   }
76 
77   /* compact out the extra columns in B */
78   for (i=0; i<mbs; i++) {
79     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
80   }
81   B->nbs       = ec;
82   sbaij->B->n   = ec*B->bs;
83   ierr = PetscFree(indices);CHKERRQ(ierr);
84 
85   /* create local vector that is used to scatter into */
86   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
87 
88   /* create two temporary index sets for building scatter-gather */
89   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
90   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
91   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
92 
93   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
94   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
95 
96   if (rank == prank){
97     ierr = PetscPrintf(PETSC_COMM_SELF," from: ");CHKERRQ(ierr);
98     ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
99     ierr = PetscPrintf(PETSC_COMM_SELF," to:");CHKERRQ(ierr);
100     ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
101   }
102 
103   /* generate the scatter context */
104   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
105   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
106   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
107 
108   sbaij->garray = garray;
109   PetscLogObjectParent(mat,sbaij->Mvctx);
110   PetscLogObjectParent(mat,sbaij->lvec);
111   PetscLogObjectParent(mat,from);
112   PetscLogObjectParent(mat,to);
113 
114   ierr = ISDestroy(from);CHKERRQ(ierr);
115   ierr = ISDestroy(to);CHKERRQ(ierr);
116 
117   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
118   lsize = (mbs + ec)*bs;
119   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
120   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
121   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
122 
123   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
124   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
125 
126   /* x index in the IS sfrom */
127   for (i=0; i<ec; i++) {
128     j = ec_owner[i];
129     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
130   }
131   /* b index in the IS sfrom */
132   k = sowners[rank]/bs + mbs;
133   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
134 
135   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
136   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
137 
138   /* x index in the IS sto */
139   k = sowners[rank]/bs + mbs;
140   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
141   /* b index in the IS sto */
142   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
143 
144   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
145 
146   if (rank == prank){
147     ierr = PetscPrintf(PETSC_COMM_SELF," sfrom: ");CHKERRQ(ierr);
148     ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
149     ierr = PetscPrintf(PETSC_COMM_SELF," sto:");CHKERRQ(ierr);
150     ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
151   }
152 
153   /* gnerate the SBAIJ scatter context */
154   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
155 
156    /*
157       Post the receives for the first matrix vector product. We sync-chronize after
158     this on the chance that the user immediately calls MatMult() after assemblying
159     the matrix.
160   */
161   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
162 
163   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
164   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
165   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
166   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
167   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
168 
169   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
170   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
171   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
172 
173   ierr = PetscFree(stmp);CHKERRQ(ierr);
174   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
175 
176   PetscLogObjectParent(mat,sbaij->sMvctx);
177   PetscLogObjectParent(mat,sbaij->slvec0);
178   PetscLogObjectParent(mat,sbaij->slvec1);
179   PetscLogObjectParent(mat,sbaij->slvec0b);
180   PetscLogObjectParent(mat,sbaij->slvec1a);
181   PetscLogObjectParent(mat,sbaij->slvec1b);
182   PetscLogObjectParent(mat,from);
183   PetscLogObjectParent(mat,to);
184 
185   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
186   ierr = ISDestroy(from);CHKERRQ(ierr);
187   ierr = ISDestroy(to);CHKERRQ(ierr);
188   ierr = VecDestroy(gvec);CHKERRQ(ierr);
189   ierr = PetscFree(sgarray);CHKERRQ(ierr);
190 
191   PetscFunctionReturn(0);
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
196 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
197 {
198   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
199   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
200   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
201   int                bs = baij->bs,*stmp;
202   IS                 from,to;
203   Vec                gvec;
204 #if defined (PETSC_USE_CTABLE)
205   PetscTable         gid1_lid1;
206   PetscTablePosition tpos;
207   int                gid,lid;
208 #endif
209 
210   PetscFunctionBegin;
211 
212 #if defined (PETSC_USE_CTABLE)
213   /* use a table - Mark Adams */
214   PetscTableCreate(B->mbs,&gid1_lid1);
215   for (i=0; i<B->mbs; i++) {
216     for (j=0; j<B->ilen[i]; j++) {
217       int data,gid1 = aj[B->i[i]+j] + 1;
218       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
219       if (!data) {
220         /* one based table */
221         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
222       }
223     }
224   }
225   /* form array of columns we need */
226   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
227   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
228   while (tpos) {
229     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
230     gid--; lid--;
231     garray[lid] = gid;
232   }
233   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
234   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
235   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
236   for (i=0; i<ec; i++) {
237     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
238   }
239   /* compact out the extra columns in B */
240   for (i=0; i<B->mbs; i++) {
241     for (j=0; j<B->ilen[i]; j++) {
242       int gid1 = aj[B->i[i] + j] + 1;
243       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
244       lid --;
245       aj[B->i[i]+j] = lid;
246     }
247   }
248   B->nbs     = ec;
249   baij->B->n = ec*B->bs;
250   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
251   /* Mark Adams */
252 #else
253   /* For the first stab we make an array as long as the number of columns */
254   /* mark those columns that are in baij->B */
255   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
256   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
257   for (i=0; i<B->mbs; i++) {
258     for (j=0; j<B->ilen[i]; j++) {
259       if (!indices[aj[B->i[i] + j]]) ec++;
260       indices[aj[B->i[i] + j] ] = 1;
261     }
262   }
263 
264   /* form array of columns we need */
265   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
266   ec = 0;
267   for (i=0; i<Nbs; i++) {
268     if (indices[i]) {
269       garray[ec++] = i;
270     }
271   }
272 
273   /* make indices now point into garray */
274   for (i=0; i<ec; i++) {
275     indices[garray[i]] = i;
276   }
277 
278   /* compact out the extra columns in B */
279   for (i=0; i<B->mbs; i++) {
280     for (j=0; j<B->ilen[i]; j++) {
281       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
282     }
283   }
284   B->nbs       = ec;
285   baij->B->n   = ec*B->bs;
286   ierr = PetscFree(indices);CHKERRQ(ierr);
287 #endif
288 
289   /* create local vector that is used to scatter into */
290   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
291 
292   /* create two temporary index sets for building scatter-gather */
293   for (i=0; i<ec; i++) {
294     garray[i] = bs*garray[i];
295   }
296   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
297   for (i=0; i<ec; i++) {
298     garray[i] = garray[i]/bs;
299   }
300 
301   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
302   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
303   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
304   ierr = PetscFree(stmp);CHKERRQ(ierr);
305 
306   /* create temporary global vector to generate scatter context */
307   /* this is inefficient, but otherwise we must do either
308      1) save garray until the first actual scatter when the vector is known or
309      2) have another way of generating a scatter context without a vector.*/
310   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
311 
312   /* gnerate the scatter context */
313   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
314 
315   /*
316       Post the receives for the first matrix vector product. We sync-chronize after
317     this on the chance that the user immediately calls MatMult() after assemblying
318     the matrix.
319   */
320   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
321   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
322 
323   PetscLogObjectParent(mat,baij->Mvctx);
324   PetscLogObjectParent(mat,baij->lvec);
325   PetscLogObjectParent(mat,from);
326   PetscLogObjectParent(mat,to);
327   baij->garray = garray;
328   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
329   ierr = ISDestroy(from);CHKERRQ(ierr);
330   ierr = ISDestroy(to);CHKERRQ(ierr);
331   ierr = VecDestroy(gvec);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 
336 /*
337      Takes the local part of an already assembled MPIBAIJ matrix
338    and disassembles it. This is to allow new nonzeros into the matrix
339    that require more communication in the matrix vector multiply.
340    Thus certain data-structures must be rebuilt.
341 
342    Kind of slow! But that's what application programmers get when
343    they are sloppy.
344 */
345 #undef __FUNCT__
346 #define __FUNCT__ "DisAssemble_MPISBAIJ"
347 int DisAssemble_MPISBAIJ(Mat A)
348 {
349   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
350   Mat           B = baij->B,Bnew;
351   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
352   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
353   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
354   MatScalar     *a = Bbaij->a;
355   PetscScalar   *atmp;
356 #if defined(PETSC_USE_MAT_SINGLE)
357   int           l;
358 #endif
359 
360   PetscFunctionBegin;
361 #if defined(PETSC_USE_MAT_SINGLE)
362   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
363 #endif
364   /* free stuff related to matrix-vec multiply */
365   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
366   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
367   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
368   if (baij->colmap) {
369 #if defined (PETSC_USE_CTABLE)
370     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
371 #else
372     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
373     baij->colmap = 0;
374     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
375 #endif
376   }
377 
378   /* make sure that B is assembled so we can access its values */
379   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
380   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
381 
382   /* invent new B and copy stuff over */
383   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
384   for (i=0; i<mbs; i++) {
385     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
386   }
387   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
388   ierr = PetscFree(nz);CHKERRQ(ierr);
389 
390   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
391   for (i=0; i<mbs; i++) {
392     rvals[0] = bs*i;
393     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
394     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
395       col = garray[Bbaij->j[j]]*bs;
396       for (k=0; k<bs; k++) {
397 #if defined(PETSC_USE_MAT_SINGLE)
398         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
399 #else
400         atmp = a+j*bs2 + k*bs;
401 #endif
402         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
403         col++;
404       }
405     }
406   }
407 #if defined(PETSC_USE_MAT_SINGLE)
408   ierr = PetscFree(atmp);CHKERRQ(ierr);
409 #endif
410   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
411   baij->garray = 0;
412   ierr = PetscFree(rvals);CHKERRQ(ierr);
413   PetscLogObjectMemory(A,-ec*sizeof(int));
414   ierr = MatDestroy(B);CHKERRQ(ierr);
415   PetscLogObjectParent(A,Bnew);
416   baij->B = Bnew;
417   A->was_assembled = PETSC_FALSE;
418   PetscFunctionReturn(0);
419 }
420 
421 
422 
423