xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision a9d4b620bd6149cfaf43d33dcaf4c3949ec403cf)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 #include "src/vec/vecimpl.h"
8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
16   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
17   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
18   IS                 from,to;
19   Vec                gvec;
20   int                rank=sbaij->rank,lsize,size=sbaij->size;
21   int                *owners=sbaij->rowners,*sowners,*ec_owner,prank=100,k;
22   PetscMap           vecmap;
23   PetscScalar        *ptr;
24 
25   PetscFunctionBegin;
26   /*
27   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], MatSetUpMultiply_MPISBAIJ is called ...\n",rank);
28   PetscSynchronizedFlush(PETSC_COMM_WORLD);
29   */
30   ierr = PetscOptionsGetInt(PETSC_NULL,"-prank",&prank,PETSC_NULL);CHKERRQ(ierr);
31   if (sbaij->lvec) {
32     ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr);
33     sbaij->lvec = 0;
34   }
35   if (sbaij->Mvctx) {
36     ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr);
37     sbaij->Mvctx = 0;
38   }
39 
40   /* For the first stab we make an array as long as the number of columns */
41   /* mark those columns that are in sbaij->B */
42   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
43   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
44   for (i=0; i<mbs; i++) {
45     for (j=0; j<B->ilen[i]; j++) {
46       if (!indices[aj[B->i[i] + j]]) ec++;
47       indices[aj[B->i[i] + j] ] = 1;
48     }
49   }
50 
51   /* form arrays of columns we need */
52   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
53   ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr);
54   ec_owner = sgarray + 2*ec;
55 
56   ec = 0;
57   for (j=0; j<size; j++){
58     for (i=owners[j]; i<owners[j+1]; i++){
59       if (indices[i]) {
60         garray[ec]   = i;
61         ec_owner[ec] = j;
62         ec++;
63       }
64     }
65   }
66 
67   if (rank == prank){
68     printf("proc[%d]: \n",rank);
69     for (i=0; i<ec;i++)  printf("[%d]: garray = %d, ec_owner = %d\n", i,garray[i], ec_owner[i]);
70   }
71 
72   /* make indices now point into garray */
73   for (i=0; i<ec; i++) {
74     indices[garray[i]] = i;
75   }
76 
77   /* compact out the extra columns in B */
78   for (i=0; i<mbs; i++) {
79     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
80   }
81   B->nbs       = ec;
82   sbaij->B->n   = ec*B->bs;
83   ierr = PetscFree(indices);CHKERRQ(ierr);
84 
85   /* create local vector that is used to scatter into */
86   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
87 
88   /* create two temporary index sets for building scatter-gather */
89   ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
90   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
91   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
92 
93   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
94   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
95 
96   if (rank == prank){
97     ierr = PetscPrintf(PETSC_COMM_SELF," from: ");CHKERRQ(ierr);
98     ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
99     ierr = PetscPrintf(PETSC_COMM_SELF," to:");CHKERRQ(ierr);
100     ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
101   }
102 
103   /* generate the scatter context */
104   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
105   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
106   ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr);
107 
108   sbaij->garray = garray;
109   PetscLogObjectParent(mat,sbaij->Mvctx);
110   PetscLogObjectParent(mat,sbaij->lvec);
111   PetscLogObjectParent(mat,from);
112   PetscLogObjectParent(mat,to);
113 
114   ierr = ISDestroy(from);CHKERRQ(ierr);
115   ierr = ISDestroy(to);CHKERRQ(ierr);
116 
117   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
118   lsize = (mbs + ec)*bs;
119   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
120   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
121   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
122 
123   ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr);
124   ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr);
125   /* for (i=0; i<=size;i++) sowners[i] /= bs; */
126 
127   if (rank==prank){
128     printf("length of slvec0: \n", vec_size);
129     for (i=0; i<=size;i++)  printf("[%d]: owners = %d, sowners/bs = %d\n", i,owners[i], sowners[i]/bs);
130   }
131 
132   /* x index in the IS sfrom */
133   for (i=0; i<ec; i++) {
134     j = ec_owner[i];
135     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
136   }
137   /* b index in the IS sfrom */
138   k = sowners[rank]/bs + mbs;
139   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
140 
141   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
142   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
143 
144   /* x index in the IS sto */
145   k = sowners[rank]/bs + mbs;
146   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
147   /* b index in the IS sto */
148   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
149 
150   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
151 
152   if (rank == prank){
153     ierr = PetscPrintf(PETSC_COMM_SELF," sfrom: ");CHKERRQ(ierr);
154     ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
155     ierr = PetscPrintf(PETSC_COMM_SELF," sto:");CHKERRQ(ierr);
156     ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
157   }
158 
159   /* gnerate the SBAIJ scatter context */
160   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
161 
162    /*
163       Post the receives for the first matrix vector product. We sync-chronize after
164     this on the chance that the user immediately calls MatMult() after assemblying
165     the matrix.
166   */
167   ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr);
168 
169   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
170   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
171   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
172   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
173   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
174 
175   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
176   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
177   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
178 
179   ierr = PetscFree(stmp);CHKERRQ(ierr);
180   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
181 
182   PetscLogObjectParent(mat,sbaij->sMvctx);
183   PetscLogObjectParent(mat,sbaij->slvec0);
184   PetscLogObjectParent(mat,sbaij->slvec1);
185   PetscLogObjectParent(mat,sbaij->slvec0b);
186   PetscLogObjectParent(mat,sbaij->slvec1a);
187   PetscLogObjectParent(mat,sbaij->slvec1b);
188   PetscLogObjectParent(mat,from);
189   PetscLogObjectParent(mat,to);
190 
191   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
192   ierr = ISDestroy(from);CHKERRQ(ierr);
193   ierr = ISDestroy(to);CHKERRQ(ierr);
194   ierr = VecDestroy(gvec);CHKERRQ(ierr);
195   ierr = PetscFree(sgarray);CHKERRQ(ierr);
196 
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
202 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
203 {
204   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
205   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
206   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
207   int                bs = baij->bs,*stmp;
208   IS                 from,to;
209   Vec                gvec;
210 #if defined (PETSC_USE_CTABLE)
211   PetscTable         gid1_lid1;
212   PetscTablePosition tpos;
213   int                gid,lid;
214 #endif
215 
216   PetscFunctionBegin;
217 
218 #if defined (PETSC_USE_CTABLE)
219   /* use a table - Mark Adams */
220   PetscTableCreate(B->mbs,&gid1_lid1);
221   for (i=0; i<B->mbs; i++) {
222     for (j=0; j<B->ilen[i]; j++) {
223       int data,gid1 = aj[B->i[i]+j] + 1;
224       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
225       if (!data) {
226         /* one based table */
227         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
228       }
229     }
230   }
231   /* form array of columns we need */
232   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
233   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
234   while (tpos) {
235     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
236     gid--; lid--;
237     garray[lid] = gid;
238   }
239   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
240   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
241   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
242   for (i=0; i<ec; i++) {
243     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
244   }
245   /* compact out the extra columns in B */
246   for (i=0; i<B->mbs; i++) {
247     for (j=0; j<B->ilen[i]; j++) {
248       int gid1 = aj[B->i[i] + j] + 1;
249       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
250       lid --;
251       aj[B->i[i]+j] = lid;
252     }
253   }
254   B->nbs     = ec;
255   baij->B->n = ec*B->bs;
256   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
257   /* Mark Adams */
258 #else
259   /* For the first stab we make an array as long as the number of columns */
260   /* mark those columns that are in baij->B */
261   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
262   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
263   for (i=0; i<B->mbs; i++) {
264     for (j=0; j<B->ilen[i]; j++) {
265       if (!indices[aj[B->i[i] + j]]) ec++;
266       indices[aj[B->i[i] + j] ] = 1;
267     }
268   }
269 
270   /* form array of columns we need */
271   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
272   ec = 0;
273   for (i=0; i<Nbs; i++) {
274     if (indices[i]) {
275       garray[ec++] = i;
276     }
277   }
278 
279   /* make indices now point into garray */
280   for (i=0; i<ec; i++) {
281     indices[garray[i]] = i;
282   }
283 
284   /* compact out the extra columns in B */
285   for (i=0; i<B->mbs; i++) {
286     for (j=0; j<B->ilen[i]; j++) {
287       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
288     }
289   }
290   B->nbs       = ec;
291   baij->B->n   = ec*B->bs;
292   ierr = PetscFree(indices);CHKERRQ(ierr);
293 #endif
294 
295   /* create local vector that is used to scatter into */
296   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
297 
298   /* create two temporary index sets for building scatter-gather */
299   for (i=0; i<ec; i++) {
300     garray[i] = bs*garray[i];
301   }
302   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
303   for (i=0; i<ec; i++) {
304     garray[i] = garray[i]/bs;
305   }
306 
307   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
308   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
309   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
310   ierr = PetscFree(stmp);CHKERRQ(ierr);
311 
312   /* create temporary global vector to generate scatter context */
313   /* this is inefficient, but otherwise we must do either
314      1) save garray until the first actual scatter when the vector is known or
315      2) have another way of generating a scatter context without a vector.*/
316   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
317 
318   /* gnerate the scatter context */
319   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
320 
321   /*
322       Post the receives for the first matrix vector product. We sync-chronize after
323     this on the chance that the user immediately calls MatMult() after assemblying
324     the matrix.
325   */
326   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
327   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
328 
329   PetscLogObjectParent(mat,baij->Mvctx);
330   PetscLogObjectParent(mat,baij->lvec);
331   PetscLogObjectParent(mat,from);
332   PetscLogObjectParent(mat,to);
333   baij->garray = garray;
334   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
335   ierr = ISDestroy(from);CHKERRQ(ierr);
336   ierr = ISDestroy(to);CHKERRQ(ierr);
337   ierr = VecDestroy(gvec);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 
341 
342 /*
343      Takes the local part of an already assembled MPIBAIJ matrix
344    and disassembles it. This is to allow new nonzeros into the matrix
345    that require more communication in the matrix vector multiply.
346    Thus certain data-structures must be rebuilt.
347 
348    Kind of slow! But that's what application programmers get when
349    they are sloppy.
350 */
351 #undef __FUNCT__
352 #define __FUNCT__ "DisAssemble_MPISBAIJ"
353 int DisAssemble_MPISBAIJ(Mat A)
354 {
355   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
356   Mat           B = baij->B,Bnew;
357   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
358   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
359   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
360   MatScalar     *a = Bbaij->a;
361   PetscScalar   *atmp;
362 #if defined(PETSC_USE_MAT_SINGLE)
363   int           l;
364 #endif
365 
366   PetscFunctionBegin;
367 #if defined(PETSC_USE_MAT_SINGLE)
368   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
369 #endif
370   /* free stuff related to matrix-vec multiply */
371   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
372   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
373   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
374   if (baij->colmap) {
375 #if defined (PETSC_USE_CTABLE)
376     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
377 #else
378     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
379     baij->colmap = 0;
380     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
381 #endif
382   }
383 
384   /* make sure that B is assembled so we can access its values */
385   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
386   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
387 
388   /* invent new B and copy stuff over */
389   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
390   for (i=0; i<mbs; i++) {
391     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
392   }
393   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
394   ierr = PetscFree(nz);CHKERRQ(ierr);
395 
396   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
397   for (i=0; i<mbs; i++) {
398     rvals[0] = bs*i;
399     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
400     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
401       col = garray[Bbaij->j[j]]*bs;
402       for (k=0; k<bs; k++) {
403 #if defined(PETSC_USE_MAT_SINGLE)
404         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
405 #else
406         atmp = a+j*bs2 + k*bs;
407 #endif
408         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
409         col++;
410       }
411     }
412   }
413 #if defined(PETSC_USE_MAT_SINGLE)
414   ierr = PetscFree(atmp);CHKERRQ(ierr);
415 #endif
416   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
417   baij->garray = 0;
418   ierr = PetscFree(rvals);CHKERRQ(ierr);
419   PetscLogObjectMemory(A,-ec*sizeof(int));
420   ierr = MatDestroy(B);CHKERRQ(ierr);
421   PetscLogObjectParent(A,Bnew);
422   baij->B = Bnew;
423   A->was_assembled = PETSC_FALSE;
424   PetscFunctionReturn(0);
425 }
426 
427 
428 
429