xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 2f59403fd2df48e396e6efa2e07d12d9da4cd99d)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->rmap.bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr);
47   ec_owner = sgarray + 2*ec;
48 
49   ec = 0;
50   for (j=0; j<size; j++){
51     for (i=owners[j]; i<owners[j+1]; i++){
52       if (indices[i]) {
53         garray[ec]   = i;
54         ec_owner[ec] = j;
55         ec++;
56       }
57     }
58   }
59 
60   /* make indices now point into garray */
61   for (i=0; i<ec; i++) indices[garray[i]] = i;
62 
63   /* compact out the extra columns in B */
64   for (i=0; i<mbs; i++) {
65     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
66   }
67   B->nbs      = ec;
68   sbaij->B->cmap.n = sbaij->B->cmap.N = ec*mat->rmap.bs;
69   ierr = PetscMapInitialize(sbaij->B->comm,&(sbaij->B->cmap));CHKERRQ(ierr);
70   ierr = PetscFree(indices);CHKERRQ(ierr);
71 
72   /* create local vector that is used to scatter into */
73   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
74 
75   /* create two temporary index sets for building scatter-gather */
76   ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
77   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
78   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr);
79 
80   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
81   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
82 
83   /* generate the scatter context
84      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
85   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
86   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
87 
88   sbaij->garray = garray;
89   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
90   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
91   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
92   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
93 
94   ierr = ISDestroy(from);CHKERRQ(ierr);
95   ierr = ISDestroy(to);CHKERRQ(ierr);
96 
97   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
98   lsize = (mbs + ec)*bs;
99   ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
100   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
101   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
102 
103   sowners = sbaij->slvec0->map.range;
104 
105   /* x index in the IS sfrom */
106   for (i=0; i<ec; i++) {
107     j = ec_owner[i];
108     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
109   }
110   /* b index in the IS sfrom */
111   k = sowners[rank]/bs + mbs;
112   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
113 
114   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
115   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr);
116 
117   /* x index in the IS sto */
118   k = sowners[rank]/bs + mbs;
119   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
120   /* b index in the IS sto */
121   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
122 
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
124 
125   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
126 
127   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
128   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
131   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
132 
133   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
134   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
135   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
136 
137   ierr = PetscFree(stmp);CHKERRQ(ierr);
138   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
139 
140   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
145   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
146   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
147   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
148 
149   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
150   ierr = ISDestroy(from);CHKERRQ(ierr);
151   ierr = ISDestroy(to);CHKERRQ(ierr);
152   ierr = VecDestroy(gvec);CHKERRQ(ierr);
153   ierr = PetscFree(sgarray);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
159 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
160 {
161   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
162   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
163   PetscErrorCode     ierr;
164   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
165   PetscInt           bs = mat->rmap.bs,*stmp;
166   IS                 from,to;
167   Vec                gvec;
168 #if defined (PETSC_USE_CTABLE)
169   PetscTable         gid1_lid1;
170   PetscTablePosition tpos;
171   PetscInt           gid,lid;
172 #else
173   PetscInt           Nbs = baij->Nbs,*indices;
174 #endif
175 
176   PetscFunctionBegin;
177 #if defined (PETSC_USE_CTABLE)
178   /* use a table - Mark Adams */
179   PetscTableCreate(B->mbs,&gid1_lid1);
180   for (i=0; i<B->mbs; i++) {
181     for (j=0; j<B->ilen[i]; j++) {
182       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
183       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
184       if (!data) {
185         /* one based table */
186         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
187       }
188     }
189   }
190   /* form array of columns we need */
191   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
192   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
193   while (tpos) {
194     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
195     gid--; lid--;
196     garray[lid] = gid;
197   }
198   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
199   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
200   for (i=0; i<ec; i++) {
201     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
202   }
203   /* compact out the extra columns in B */
204   for (i=0; i<B->mbs; i++) {
205     for (j=0; j<B->ilen[i]; j++) {
206       PetscInt gid1 = aj[B->i[i] + j] + 1;
207       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
208       lid --;
209       aj[B->i[i]+j] = lid;
210     }
211   }
212   B->nbs     = ec;
213   baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs;
214   ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr);
215   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
216   /* Mark Adams */
217 #else
218   /* For the first stab we make an array as long as the number of columns */
219   /* mark those columns that are in baij->B */
220   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
221   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
222   for (i=0; i<B->mbs; i++) {
223     for (j=0; j<B->ilen[i]; j++) {
224       if (!indices[aj[B->i[i] + j]]) ec++;
225       indices[aj[B->i[i] + j] ] = 1;
226     }
227   }
228 
229   /* form array of columns we need */
230   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
231   ec = 0;
232   for (i=0; i<Nbs; i++) {
233     if (indices[i]) {
234       garray[ec++] = i;
235     }
236   }
237 
238   /* make indices now point into garray */
239   for (i=0; i<ec; i++) {
240     indices[garray[i]] = i;
241   }
242 
243   /* compact out the extra columns in B */
244   for (i=0; i<B->mbs; i++) {
245     for (j=0; j<B->ilen[i]; j++) {
246       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
247     }
248   }
249   B->nbs       = ec;
250   baij->B->cmap.n   = ec*mat->rmap.bs;
251   ierr = PetscFree(indices);CHKERRQ(ierr);
252 #endif
253 
254   /* create local vector that is used to scatter into */
255   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
256 
257   /* create two temporary index sets for building scatter-gather */
258   for (i=0; i<ec; i++) {
259     garray[i] = bs*garray[i];
260   }
261   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
262   for (i=0; i<ec; i++) {
263     garray[i] = garray[i]/bs;
264   }
265 
266   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
267   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
268   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
269   ierr = PetscFree(stmp);CHKERRQ(ierr);
270 
271   /* create temporary global vector to generate scatter context */
272   /* this is inefficient, but otherwise we must do either
273      1) save garray until the first actual scatter when the vector is known or
274      2) have another way of generating a scatter context without a vector.*/
275   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
276 
277   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
278 
279   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
280   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
281   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
282   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
283   baij->garray = garray;
284   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
285   ierr = ISDestroy(from);CHKERRQ(ierr);
286   ierr = ISDestroy(to);CHKERRQ(ierr);
287   ierr = VecDestroy(gvec);CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 /*
292      Takes the local part of an already assembled MPISBAIJ matrix
293    and disassembles it. This is to allow new nonzeros into the matrix
294    that require more communication in the matrix vector multiply.
295    Thus certain data-structures must be rebuilt.
296 
297    Kind of slow! But that's what application programmers get when
298    they are sloppy.
299 */
300 #undef __FUNCT__
301 #define __FUNCT__ "DisAssemble_MPISBAIJ"
302 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
303 {
304   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
305   Mat            B = baij->B,Bnew;
306   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
307   PetscErrorCode ierr;
308   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray;
309   PetscInt       k,bs=A->rmap.bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap.n;
310   MatScalar      *a = Bbaij->a;
311   PetscScalar    *atmp;
312 #if defined(PETSC_USE_MAT_SINGLE)
313   PetscInt       l;
314 #endif
315 
316   PetscFunctionBegin;
317 #if defined(PETSC_USE_MAT_SINGLE)
318   ierr = PetscMalloc(A->rmap.bs*sizeof(PetscScalar),&atmp);
319 #endif
320   /* free stuff related to matrix-vec multiply */
321   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
322   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
323   baij->lvec = 0;
324   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
325   baij->Mvctx = 0;
326 
327   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
328   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
329   baij->slvec0 = 0;
330   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
331   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
332   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
333   baij->slvec1 = 0;
334 
335   if (baij->colmap) {
336 #if defined (PETSC_USE_CTABLE)
337     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
338 #else
339     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
340     baij->colmap = 0;
341     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
342 #endif
343   }
344 
345   /* make sure that B is assembled so we can access its values */
346   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348 
349   /* invent new B and copy stuff over */
350   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
351   for (i=0; i<mbs; i++) {
352     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
353   }
354   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
355   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
356   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
357   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr);
358   ierr = PetscFree(nz);CHKERRQ(ierr);
359 
360   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
361   for (i=0; i<mbs; i++) {
362     rvals[0] = bs*i;
363     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
364     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
365       col = garray[Bbaij->j[j]]*bs;
366       for (k=0; k<bs; k++) {
367 #if defined(PETSC_USE_MAT_SINGLE)
368         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
369 #else
370         atmp = a+j*bs2 + k*bs;
371 #endif
372         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
373         col++;
374       }
375     }
376   }
377 #if defined(PETSC_USE_MAT_SINGLE)
378   ierr = PetscFree(atmp);CHKERRQ(ierr);
379 #endif
380   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
381   baij->garray = 0;
382   ierr = PetscFree(rvals);CHKERRQ(ierr);
383   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
384   ierr = MatDestroy(B);CHKERRQ(ierr);
385   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
386   baij->B = Bnew;
387   A->was_assembled = PETSC_FALSE;
388   PetscFunctionReturn(0);
389 }
390 
391 
392 
393