xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
7 
8 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
14 {
15   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
16   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);
17   PetscErrorCode ierr;
18   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
19   PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
20   IS             from,to;
21   Vec            gvec;
22   PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size;
23   PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k;
24   PetscScalar    *ptr;
25 
26   PetscFunctionBegin;
27   if (sbaij->sMvctx) {
28     /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */
29     ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr);
30     sbaij->sMvctx = 0;
31   }
32 
33   /* For the first stab we make an array as long as the number of columns */
34   /* mark those columns that are in sbaij->B */
35   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
36   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
37   for (i=0; i<mbs; i++) {
38     for (j=0; j<B->ilen[i]; j++) {
39       if (!indices[aj[B->i[i] + j]]) ec++;
40       indices[aj[B->i[i] + j] ] = 1;
41     }
42   }
43 
44   /* form arrays of columns we need */
45   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
47 
48   ec = 0;
49   for (j=0; j<size; j++){
50     for (i=owners[j]; i<owners[j+1]; i++){
51       if (indices[i]) {
52         garray[ec]   = i;
53         ec_owner[ec] = j;
54         ec++;
55       }
56     }
57   }
58 
59   /* make indices now point into garray */
60   for (i=0; i<ec; i++) indices[garray[i]] = i;
61 
62   /* compact out the extra columns in B */
63   for (i=0; i<mbs; i++) {
64     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
65   }
66   B->nbs      = ec;
67   sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscFree(indices);CHKERRQ(ierr);
70 
71   /* create local vector that is used to scatter into */
72   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
73 
74   /* create two temporary index sets for building scatter-gather */
75   ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
76   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
77   for (i=0; i<ec; i++) { stmp[i] = i; }
78   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
79 
80   /* generate the scatter context
81      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
82   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
83   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
84   ierr = VecDestroy(gvec);CHKERRQ(ierr);
85 
86   sbaij->garray = garray;
87   ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
88   ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
89   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
90   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
91 
92   ierr = ISDestroy(from);CHKERRQ(ierr);
93   ierr = ISDestroy(to);CHKERRQ(ierr);
94 
95   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
96   lsize = (mbs + ec)*bs;
97   ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
98   ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
99   ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
100 
101   sowners = sbaij->slvec0->map->range;
102 
103   /* x index in the IS sfrom */
104   for (i=0; i<ec; i++) {
105     j = ec_owner[i];
106     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
107   }
108   /* b index in the IS sfrom */
109   k = sowners[rank]/bs + mbs;
110   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
112 
113   /* x index in the IS sto */
114   k = sowners[rank]/bs + mbs;
115   for (i=0; i<ec; i++) stmp[i] = (k + i);
116   /* b index in the IS sto */
117   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
118 
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
120 
121   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
122 
123   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
124   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
127   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
128 
129   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
131   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
132 
133   ierr = PetscFree(stmp);CHKERRQ(ierr);
134   ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
135 
136   ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
137   ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
138   ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
139   ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
140   ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
141   ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
144 
145   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
146   ierr = ISDestroy(from);CHKERRQ(ierr);
147   ierr = ISDestroy(to);CHKERRQ(ierr);
148   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm"
154 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
155 {
156   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
157   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
158   PetscErrorCode     ierr;
159   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
160   PetscInt           bs = mat->rmap->bs,*stmp;
161   IS                 from,to;
162   Vec                gvec;
163 #if defined (PETSC_USE_CTABLE)
164   PetscTable         gid1_lid1;
165   PetscTablePosition tpos;
166   PetscInt           gid,lid;
167 #else
168   PetscInt           Nbs = baij->Nbs,*indices;
169 #endif
170 
171   PetscFunctionBegin;
172 #if defined (PETSC_USE_CTABLE)
173   /* use a table - Mark Adams */
174   PetscTableCreate(B->mbs,&gid1_lid1);
175   for (i=0; i<B->mbs; i++) {
176     for (j=0; j<B->ilen[i]; j++) {
177       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
178       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
179       if (!data) {
180         /* one based table */
181         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
182       }
183     }
184   }
185   /* form array of columns we need */
186   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
187   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
188   while (tpos) {
189     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
190     gid--; lid--;
191     garray[lid] = gid;
192   }
193   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
194   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
195   for (i=0; i<ec; i++) {
196     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
197   }
198   /* compact out the extra columns in B */
199   for (i=0; i<B->mbs; i++) {
200     for (j=0; j<B->ilen[i]; j++) {
201       PetscInt gid1 = aj[B->i[i] + j] + 1;
202       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
203       lid --;
204       aj[B->i[i]+j] = lid;
205     }
206   }
207   B->nbs     = ec;
208   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
209   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
210   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
211   /* Mark Adams */
212 #else
213   /* For the first stab we make an array as long as the number of columns */
214   /* mark those columns that are in baij->B */
215   ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
216   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
217   for (i=0; i<B->mbs; i++) {
218     for (j=0; j<B->ilen[i]; j++) {
219       if (!indices[aj[B->i[i] + j]]) ec++;
220       indices[aj[B->i[i] + j] ] = 1;
221     }
222   }
223 
224   /* form array of columns we need */
225   ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
226   ec = 0;
227   for (i=0; i<Nbs; i++) {
228     if (indices[i]) {
229       garray[ec++] = i;
230     }
231   }
232 
233   /* make indices now point into garray */
234   for (i=0; i<ec; i++) {
235     indices[garray[i]] = i;
236   }
237 
238   /* compact out the extra columns in B */
239   for (i=0; i<B->mbs; i++) {
240     for (j=0; j<B->ilen[i]; j++) {
241       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
242     }
243   }
244   B->nbs       = ec;
245   baij->B->cmap->n   = ec*mat->rmap->bs;
246   ierr = PetscFree(indices);CHKERRQ(ierr);
247 #endif
248 
249   /* create local vector that is used to scatter into */
250   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
251 
252   /* create two temporary index sets for building scatter-gather */
253   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
254 
255   ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
256   for (i=0; i<ec; i++) { stmp[i] = i; }
257   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
258 
259   /* create temporary global vector to generate scatter context */
260   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
261   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
262   ierr = VecDestroy(gvec);CHKERRQ(ierr);
263 
264   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
265   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
266   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
267   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
268   baij->garray = garray;
269   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
270   ierr = ISDestroy(from);CHKERRQ(ierr);
271   ierr = ISDestroy(to);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 /*
276      Takes the local part of an already assembled MPISBAIJ matrix
277    and disassembles it. This is to allow new nonzeros into the matrix
278    that require more communication in the matrix vector multiply.
279    Thus certain data-structures must be rebuilt.
280 
281    Kind of slow! But that's what application programmers get when
282    they are sloppy.
283 */
284 #undef __FUNCT__
285 #define __FUNCT__ "DisAssemble_MPISBAIJ"
286 PetscErrorCode DisAssemble_MPISBAIJ(Mat A)
287 {
288   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
289   Mat            B = baij->B,Bnew;
290   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
291   PetscErrorCode ierr;
292   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
293   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
294   MatScalar      *a = Bbaij->a;
295   PetscScalar    *atmp;
296 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
297   PetscInt       l;
298 #endif
299 
300   PetscFunctionBegin;
301 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
302   ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
303 #endif
304   /* free stuff related to matrix-vec multiply */
305   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
306   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);
307   baij->lvec = 0;
308   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);
309   baij->Mvctx = 0;
310 
311   ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
312   ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
313   baij->slvec0 = 0;
314   ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
315   ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
316   ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
317   baij->slvec1 = 0;
318 
319   if (baij->colmap) {
320 #if defined (PETSC_USE_CTABLE)
321     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
322 #else
323     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
324     baij->colmap = 0;
325     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
326 #endif
327   }
328 
329   /* make sure that B is assembled so we can access its values */
330   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
332 
333   /* invent new B and copy stuff over */
334   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
335   for (i=0; i<mbs; i++) {
336     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
337   }
338   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
339   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
340   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
341   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
342   ierr = PetscFree(nz);CHKERRQ(ierr);
343 
344   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
345   for (i=0; i<mbs; i++) {
346     rvals[0] = bs*i;
347     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
348     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
349       col = garray[Bbaij->j[j]]*bs;
350       for (k=0; k<bs; k++) {
351 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
352         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
353 #else
354         atmp = a+j*bs2 + k*bs;
355 #endif
356         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
357         col++;
358       }
359     }
360   }
361 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
362   ierr = PetscFree(atmp);CHKERRQ(ierr);
363 #endif
364   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
365   baij->garray = 0;
366   ierr = PetscFree(rvals);CHKERRQ(ierr);
367   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
368   ierr = MatDestroy(B);CHKERRQ(ierr);
369   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
370   baij->B = Bnew;
371   A->was_assembled = PETSC_FALSE;
372   PetscFunctionReturn(0);
373 }
374 
375 
376 
377