xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision e82e9f6b2f396ccc2201d5c70911055c46105893)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
47 
48   ec = 0;
49   for (j=0; j<size; j++){
50     for (i=owners[j]; i<owners[j+1]; i++){
51       if (indices[i]) {
52         garray[ec]   = i;
53         ec_owner[ec] = j;
54         ec++;
55       }
56     }
57   }
58 
59   /* make indices now point into garray */
60   for (i=0; i<ec; i++) indices[garray[i]] = i;
61 
62   /* compact out the extra columns in B */
63   for (i=0; i<mbs; i++) {
64     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
65   }
66   B->nbs      = ec;
67   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscFree(indices);CHKERRQ(ierr);
70 
71   /* create local vector that is used to scatter into */
72   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
73 
74   /* create two temporary index sets for building scatter-gather */
75   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
76   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
77   for (i=0; i<ec; i++) { stmp[i] = i; }
78   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
79 
80   /* generate the scatter context
81      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
82   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
83   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
84   ierr = VecDestroy(gvec);CHKERRQ(ierr);
85 
86   sbaij->garray = garray;
87   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
88   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
89   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
90   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
91 
92   ierr = ISDestroy(from);CHKERRQ(ierr);
93   ierr = ISDestroy(to);CHKERRQ(ierr);
94 
95   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
96   lsize = (mbs + ec)*bs;
97   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
98   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
99   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
100 
101   sowners = sbaij->slvec0->map->range;
102 
103   /* x index in the IS sfrom */
104   for (i=0; i<ec; i++) {
105     j = ec_owner[i];
106     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
107   }
108   /* b index in the IS sfrom */
109   k = sowners[rank]/bs + mbs;
110   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,&from);CHKERRQ(ierr);
112 
113   /* x index in the IS sto */
114   k = sowners[rank]/bs + mbs;
115   for (i=0; i<ec; i++) stmp[i] = (k + i);
116   /* b index in the IS sto */
117   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
118 
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr);
120 
121   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
122 
123   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
124   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
127   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
128 
129   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
131   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
132 
133   ierr = PetscFree(stmp);CHKERRQ(ierr);
134   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
135 
136   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
140   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
144 
145   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
146   ierr = ISDestroy(from);CHKERRQ(ierr);
147   ierr = ISDestroy(to);CHKERRQ(ierr);
148   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
154 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
155 {
156   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
157   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
158   PetscErrorCode     ierr;
159   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
160   PetscInt           bs = mat->rmap->bs,*stmp;
161   IS                 from,to;
162   Vec                gvec;
163 #if defined (PETSC_USE_CTABLE)
164   PetscTable         gid1_lid1;
165   PetscTablePosition tpos;
166   PetscInt           gid,lid;
167 #else
168   PetscInt           Nbs = baij->Nbs,*indices;
169 #endif
170 
171   PetscFunctionBegin;
172 #if defined (PETSC_USE_CTABLE)
173   /* use a table - Mark Adams */
174   PetscTableCreate(B->mbs,&gid1_lid1);
175   for (i=0; i<B->mbs; i++) {
176     for (j=0; j<B->ilen[i]; j++) {
177       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
178       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
179       if (!data) {
180         /* one based table */
181         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
182       }
183     }
184   }
185   /* form array of columns we need */
186   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
187   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
188   while (tpos) {
189     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
190     gid--; lid--;
191     garray[lid] = gid;
192   }
193   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
194   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
195   for (i=0; i<ec; i++) {
196     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
197   }
198   /* compact out the extra columns in B */
199   for (i=0; i<B->mbs; i++) {
200     for (j=0; j<B->ilen[i]; j++) {
201       PetscInt gid1 = aj[B->i[i] + j] + 1;
202       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
203       lid --;
204       aj[B->i[i]+j] = lid;
205     }
206   }
207   B->nbs     = ec;
208   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
209   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
210   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
211   /* Mark Adams */
212 #else
213   /* For the first stab we make an array as long as the number of columns */
214   /* mark those columns that are in baij->B */
215   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
216   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
217   for (i=0; i<B->mbs; i++) {
218     for (j=0; j<B->ilen[i]; j++) {
219       if (!indices[aj[B->i[i] + j]]) ec++;
220       indices[aj[B->i[i] + j] ] = 1;
221     }
222   }
223 
224   /* form array of columns we need */
225   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
226   ec = 0;
227   for (i=0; i<Nbs; i++) {
228     if (indices[i]) {
229       garray[ec++] = i;
230     }
231   }
232 
233   /* make indices now point into garray */
234   for (i=0; i<ec; i++) {
235     indices[garray[i]] = i;
236   }
237 
238   /* compact out the extra columns in B */
239   for (i=0; i<B->mbs; i++) {
240     for (j=0; j<B->ilen[i]; j++) {
241       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
242     }
243   }
244   B->nbs       = ec;
245   baij->B->cmap->n   = ec*mat->rmap->bs;
246   ierr = PetscFree(indices);CHKERRQ(ierr);
247 #endif
248 
249   /* create local vector that is used to scatter into */
250   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
251 
252   /* create two temporary index sets for building scatter-gather */
253   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
254 
255   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
256   for (i=0; i<ec; i++) { stmp[i] = i; }
257   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
258   ierr = PetscFree(stmp);CHKERRQ(ierr);
259 
260   /* create temporary global vector to generate scatter context */
261   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
262   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
263   ierr = VecDestroy(gvec);CHKERRQ(ierr);
264 
265   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
266   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
267   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
268   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
269   baij->garray = garray;
270   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
271   ierr = ISDestroy(from);CHKERRQ(ierr);
272   ierr = ISDestroy(to);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 /*
277      Takes the local part of an already assembled MPISBAIJ matrix
278    and disassembles it. This is to allow new nonzeros into the matrix
279    that require more communication in the matrix vector multiply.
280    Thus certain data-structures must be rebuilt.
281 
282    Kind of slow! But that's what application programmers get when
283    they are sloppy.
284 */
285 #undef __FUNCT__
286 #define __FUNCT__ "DisAssemble_MPISBAIJ"
287 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
288 {
289   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
290   Mat            B = baij->B,Bnew;
291   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
292   PetscErrorCode ierr;
293   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
294   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
295   MatScalar      *a = Bbaij->a;
296   PetscScalar    *atmp;
297 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
298   PetscInt       l;
299 #endif
300 
301   PetscFunctionBegin;
302 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
303   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
304 #endif
305   /* free stuff related to matrix-vec multiply */
306   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
307   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
308   baij->lvec = 0;
309   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
310   baij->Mvctx = 0;
311 
312   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
313   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
314   baij->slvec0 = 0;
315   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
316   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
317   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
318   baij->slvec1 = 0;
319 
320   if (baij->colmap) {
321 #if defined (PETSC_USE_CTABLE)
322     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
323 #else
324     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
325     baij->colmap = 0;
326     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
327 #endif
328   }
329 
330   /* make sure that B is assembled so we can access its values */
331   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
332   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
333 
334   /* invent new B and copy stuff over */
335   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
336   for (i=0; i<mbs; i++) {
337     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
338   }
339   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
340   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
341   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
342   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
343   ierr = PetscFree(nz);CHKERRQ(ierr);
344 
345   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
346   for (i=0; i<mbs; i++) {
347     rvals[0] = bs*i;
348     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
349     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
350       col = garray[Bbaij->j[j]]*bs;
351       for (k=0; k<bs; k++) {
352 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
353         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
354 #else
355         atmp = a+j*bs2 + k*bs;
356 #endif
357         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
358         col++;
359       }
360     }
361   }
362 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
363   ierr = PetscFree(atmp);CHKERRQ(ierr);
364 #endif
365   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
366   baij->garray = 0;
367   ierr = PetscFree(rvals);CHKERRQ(ierr);
368   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
369   ierr = MatDestroy(B);CHKERRQ(ierr);
370   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
371   baij->B = Bnew;
372   A->was_assembled = PETSC_FALSE;
373   PetscFunctionReturn(0);
374 }
375 
376 
377 
378