xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 1d2e40055fe3bf783df36eeeccf2d7d5fcfab0fd)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap.bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs;
68   ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap.n =baij->B->cmap.N  = ec*mat->rmap.bs;
105   ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   for (i=0; i<ec; i++) {
114     garray[i] = bs*garray[i];
115   }
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) {
118     garray[i] = garray[i]/bs;
119   }
120 
121   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
122   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
124   ierr = PetscFree(stmp);CHKERRQ(ierr);
125 
126   /* create temporary global vector to generate scatter context */
127   /* this is inefficient, but otherwise we must do either
128      1) save garray until the first actual scatter when the vector is known or
129      2) have another way of generating a scatter context without a vector.*/
130   ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr);
131 
132   /* gnerate the scatter context */
133   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
134 
135   /*
136       Post the receives for the first matrix vector product. We sync-chronize after
137     this on the chance that the user immediately calls MatMult() after assemblying
138     the matrix.
139   */
140   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
141   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
142 
143   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
145   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
146   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
147   baij->garray = garray;
148   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
149   ierr = ISDestroy(from);CHKERRQ(ierr);
150   ierr = ISDestroy(to);CHKERRQ(ierr);
151   ierr = VecDestroy(gvec);CHKERRQ(ierr);
152   PetscFunctionReturn(0);
153 }
154 
155 /*
156      Takes the local part of an already assembled MPIBAIJ matrix
157    and disassembles it. This is to allow new nonzeros into the matrix
158    that require more communication in the matrix vector multiply.
159    Thus certain data-structures must be rebuilt.
160 
161    Kind of slow! But that's what application programmers get when
162    they are sloppy.
163 */
164 #undef __FUNCT__
165 #define __FUNCT__ "DisAssemble_MPIBAIJ"
166 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
167 {
168   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
169   Mat            B = baij->B,Bnew;
170   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
171   PetscErrorCode ierr;
172   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray;
173   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap.n;
174   MatScalar      *a = Bbaij->a;
175   PetscScalar    *atmp;
176 #if defined(PETSC_USE_MAT_SINGLE)
177   PetscInt       k;
178 #endif
179 
180   PetscFunctionBegin;
181   /* free stuff related to matrix-vec multiply */
182   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
183   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
184   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
185   if (baij->colmap) {
186 #if defined (PETSC_USE_CTABLE)
187     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
188 #else
189     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
190     baij->colmap = 0;
191     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
192 #endif
193   }
194 
195   /* make sure that B is assembled so we can access its values */
196   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
198 
199   /* invent new B and copy stuff over */
200   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
201   for (i=0; i<mbs; i++) {
202     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
203   }
204   ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr);
205   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
206   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
207   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr);
208   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
209 
210 #if defined(PETSC_USE_MAT_SINGLE)
211   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
212 #endif
213     for (i=0; i<mbs; i++) {
214       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
215         col  = garray[Bbaij->j[j]];
216 #if defined(PETSC_USE_MAT_SINGLE)
217         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
218 #else
219         atmp = a + j*bs2;
220 #endif
221         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
222       }
223     }
224   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
225 
226 #if defined(PETSC_USE_MAT_SINGLE)
227   ierr = PetscFree(atmp);CHKERRQ(ierr);
228 #endif
229 
230   ierr = PetscFree(nz);CHKERRQ(ierr);
231   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
232   baij->garray = 0;
233   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
234   ierr = MatDestroy(B);CHKERRQ(ierr);
235   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
236   baij->B = Bnew;
237   A->was_assembled = PETSC_FALSE;
238   PetscFunctionReturn(0);
239 }
240 
241 /*      ugly stuff added for Glenn someday we should fix this up */
242 
243 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
244                                       parts of the local matrix */
245 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
246 
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
250 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
251 {
252   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
253   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
254   PetscErrorCode ierr;
255   PetscInt       bs = inA->rmap.bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
256   PetscInt       *r_rmapd,*r_rmapo;
257 
258   PetscFunctionBegin;
259   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
260   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
261   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
262   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
263   nt   = 0;
264   for (i=0; i<inA->bmapping->n; i++) {
265     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
266       nt++;
267       r_rmapd[i] = inA->bmapping->indices[i] + 1;
268     }
269   }
270   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
271   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
272   for (i=0; i<inA->bmapping->n; i++) {
273     if (r_rmapd[i]){
274       for (j=0; j<bs; j++) {
275         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
276       }
277     }
278   }
279   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
280   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
281 
282   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
283   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
284   for (i=0; i<B->nbs; i++) {
285     lindices[garray[i]] = i+1;
286   }
287   no   = inA->bmapping->n - nt;
288   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
289   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
290   nt   = 0;
291   for (i=0; i<inA->bmapping->n; i++) {
292     if (lindices[inA->bmapping->indices[i]]) {
293       nt++;
294       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
295     }
296   }
297   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
298   ierr = PetscFree(lindices);CHKERRQ(ierr);
299   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
300   for (i=0; i<inA->bmapping->n; i++) {
301     if (r_rmapo[i]){
302       for (j=0; j<bs; j++) {
303         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
304       }
305     }
306   }
307   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
308   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
309 
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
315 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
316 {
317   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
318   PetscErrorCode ierr,(*f)(Mat,Vec);
319 
320   PetscFunctionBegin;
321   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
322   if (f) {
323     ierr = (*f)(A,scale);CHKERRQ(ierr);
324   }
325   PetscFunctionReturn(0);
326 }
327 
328 EXTERN_C_BEGIN
329 #undef __FUNCT__
330 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
331 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
332 {
333   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
334   PetscErrorCode ierr;
335   PetscInt       n,i;
336   PetscScalar    *d,*o,*s;
337 
338   PetscFunctionBegin;
339   if (!uglyrmapd) {
340     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
341   }
342 
343   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
344 
345   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
346   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
347   for (i=0; i<n; i++) {
348     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
349   }
350   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
351   /* column scale "diagonal" portion of local matrix */
352   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
353 
354   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
355   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
356   for (i=0; i<n; i++) {
357     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
358   }
359   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
360   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
361   /* column scale "off-diagonal" portion of local matrix */
362   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
363 
364   PetscFunctionReturn(0);
365 }
366 EXTERN_C_END
367 
368 
369