xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision c05d87d6c2fa1d0952fe676de20339d09daa79bc)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
47 
48   ec = 0;
49   for (j=0; j<size; j++){
50     for (i=owners[j]; i<owners[j+1]; i++){
51       if (indices[i]) {
52         garray[ec]   = i;
53         ec_owner[ec] = j;
54         ec++;
55       }
56     }
57   }
58 
59   /* make indices now point into garray */
60   for (i=0; i<ec; i++) indices[garray[i]] = i;
61 
62   /* compact out the extra columns in B */
63   for (i=0; i<mbs; i++) {
64     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
65   }
66   B->nbs      = ec;
67   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscFree(indices);CHKERRQ(ierr);
70 
71   /* create local vector that is used to scatter into */
72   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
73 
74   /* create two temporary index sets for building scatter-gather */
75   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
76   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
77   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
78 
79   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
80   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
81 
82   /* generate the scatter context
83      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
84   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
85   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
86   ierr = VecDestroy(gvec);CHKERRQ(ierr);
87 
88   sbaij->garray = garray;
89   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
90   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
91   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
92   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
93 
94   ierr = ISDestroy(from);CHKERRQ(ierr);
95   ierr = ISDestroy(to);CHKERRQ(ierr);
96 
97   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
98   lsize = (mbs + ec)*bs;
99   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
100   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
101   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
102 
103   sowners = sbaij->slvec0->map->range;
104 
105   /* x index in the IS sfrom */
106   for (i=0; i<ec; i++) {
107     j = ec_owner[i];
108     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
109   }
110   /* b index in the IS sfrom */
111   k = sowners[rank]/bs + mbs;
112   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
113 
114   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
115   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
116 
117   /* x index in the IS sto */
118   k = sowners[rank]/bs + mbs;
119   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
120   /* b index in the IS sto */
121   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
122 
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
124 
125   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
126 
127   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
128   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
131   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
132 
133   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
134   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
135   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
136 
137   ierr = PetscFree(stmp);CHKERRQ(ierr);
138   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
139 
140   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
145   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
146   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
147   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
148 
149   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
150   ierr = ISDestroy(from);CHKERRQ(ierr);
151   ierr = ISDestroy(to);CHKERRQ(ierr);
152   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
158 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
159 {
160   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
161   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
162   PetscErrorCode     ierr;
163   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
164   PetscInt           bs = mat->rmap->bs,*stmp;
165   IS                 from,to;
166   Vec                gvec;
167 #if defined (PETSC_USE_CTABLE)
168   PetscTable         gid1_lid1;
169   PetscTablePosition tpos;
170   PetscInt           gid,lid;
171 #else
172   PetscInt           Nbs = baij->Nbs,*indices;
173 #endif
174 
175   PetscFunctionBegin;
176 #if defined (PETSC_USE_CTABLE)
177   /* use a table - Mark Adams */
178   PetscTableCreate(B->mbs,&gid1_lid1);
179   for (i=0; i<B->mbs; i++) {
180     for (j=0; j<B->ilen[i]; j++) {
181       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
182       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
183       if (!data) {
184         /* one based table */
185         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
186       }
187     }
188   }
189   /* form array of columns we need */
190   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
191   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
192   while (tpos) {
193     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
194     gid--; lid--;
195     garray[lid] = gid;
196   }
197   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
198   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
199   for (i=0; i<ec; i++) {
200     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
201   }
202   /* compact out the extra columns in B */
203   for (i=0; i<B->mbs; i++) {
204     for (j=0; j<B->ilen[i]; j++) {
205       PetscInt gid1 = aj[B->i[i] + j] + 1;
206       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
207       lid --;
208       aj[B->i[i]+j] = lid;
209     }
210   }
211   B->nbs     = ec;
212   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
213   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
214   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
215   /* Mark Adams */
216 #else
217   /* For the first stab we make an array as long as the number of columns */
218   /* mark those columns that are in baij->B */
219   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
220   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
221   for (i=0; i<B->mbs; i++) {
222     for (j=0; j<B->ilen[i]; j++) {
223       if (!indices[aj[B->i[i] + j]]) ec++;
224       indices[aj[B->i[i] + j] ] = 1;
225     }
226   }
227 
228   /* form array of columns we need */
229   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
230   ec = 0;
231   for (i=0; i<Nbs; i++) {
232     if (indices[i]) {
233       garray[ec++] = i;
234     }
235   }
236 
237   /* make indices now point into garray */
238   for (i=0; i<ec; i++) {
239     indices[garray[i]] = i;
240   }
241 
242   /* compact out the extra columns in B */
243   for (i=0; i<B->mbs; i++) {
244     for (j=0; j<B->ilen[i]; j++) {
245       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
246     }
247   }
248   B->nbs       = ec;
249   baij->B->cmap->n   = ec*mat->rmap->bs;
250   ierr = PetscFree(indices);CHKERRQ(ierr);
251 #endif
252 
253   /* create local vector that is used to scatter into */
254   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
255 
256   /* create two temporary index sets for building scatter-gather */
257   for (i=0; i<ec; i++) {
258     garray[i] = bs*garray[i];
259   }
260   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
261   for (i=0; i<ec; i++) {
262     garray[i] = garray[i]/bs;
263   }
264 
265   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
266   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
267   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
268   ierr = PetscFree(stmp);CHKERRQ(ierr);
269 
270   /* create temporary global vector to generate scatter context */
271   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
272   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
273   ierr = VecDestroy(gvec);CHKERRQ(ierr);
274 
275   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
276   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
277   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
278   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
279   baij->garray = garray;
280   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
281   ierr = ISDestroy(from);CHKERRQ(ierr);
282   ierr = ISDestroy(to);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 /*
287      Takes the local part of an already assembled MPISBAIJ matrix
288    and disassembles it. This is to allow new nonzeros into the matrix
289    that require more communication in the matrix vector multiply.
290    Thus certain data-structures must be rebuilt.
291 
292    Kind of slow! But that's what application programmers get when
293    they are sloppy.
294 */
295 #undef __FUNCT__
296 #define __FUNCT__ "DisAssemble_MPISBAIJ"
297 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
298 {
299   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
300   Mat            B = baij->B,Bnew;
301   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
302   PetscErrorCode ierr;
303   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
304   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
305   MatScalar      *a = Bbaij->a;
306   PetscScalar    *atmp;
307 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
308   PetscInt       l;
309 #endif
310 
311   PetscFunctionBegin;
312 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
313   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
314 #endif
315   /* free stuff related to matrix-vec multiply */
316   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
317   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
318   baij->lvec = 0;
319   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
320   baij->Mvctx = 0;
321 
322   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
323   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
324   baij->slvec0 = 0;
325   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
326   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
327   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
328   baij->slvec1 = 0;
329 
330   if (baij->colmap) {
331 #if defined (PETSC_USE_CTABLE)
332     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
333 #else
334     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
335     baij->colmap = 0;
336     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
337 #endif
338   }
339 
340   /* make sure that B is assembled so we can access its values */
341   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343 
344   /* invent new B and copy stuff over */
345   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
346   for (i=0; i<mbs; i++) {
347     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
348   }
349   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
350   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
351   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
352   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
353   ierr = PetscFree(nz);CHKERRQ(ierr);
354 
355   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
356   for (i=0; i<mbs; i++) {
357     rvals[0] = bs*i;
358     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
359     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
360       col = garray[Bbaij->j[j]]*bs;
361       for (k=0; k<bs; k++) {
362 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
363         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
364 #else
365         atmp = a+j*bs2 + k*bs;
366 #endif
367         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
368         col++;
369       }
370     }
371   }
372 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
373   ierr = PetscFree(atmp);CHKERRQ(ierr);
374 #endif
375   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
376   baij->garray = 0;
377   ierr = PetscFree(rvals);CHKERRQ(ierr);
378   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
379   ierr = MatDestroy(B);CHKERRQ(ierr);
380   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
381   baij->B = Bnew;
382   A->was_assembled = PETSC_FALSE;
383   PetscFunctionReturn(0);
384 }
385 
386 
387 
388