xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision dba47a550923b04c7c4ebbb735eb62a1b3e4e9ae)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->n = ec*mat->bs;
68   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
69   /* Mark Adams */
70 #else
71   /* Make an array as long as the number of columns */
72   /* mark those columns that are in baij->B */
73   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
74   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
75   for (i=0; i<B->mbs; i++) {
76     for (j=0; j<B->ilen[i]; j++) {
77       if (!indices[aj[B->i[i] + j]]) ec++;
78       indices[aj[B->i[i] + j]] = 1;
79     }
80   }
81 
82   /* form array of columns we need */
83   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
84   ec = 0;
85   for (i=0; i<Nbs; i++) {
86     if (indices[i]) {
87       garray[ec++] = i;
88     }
89   }
90 
91   /* make indices now point into garray */
92   for (i=0; i<ec; i++) {
93     indices[garray[i]] = i;
94   }
95 
96   /* compact out the extra columns in B */
97   for (i=0; i<B->mbs; i++) {
98     for (j=0; j<B->ilen[i]; j++) {
99       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100     }
101   }
102   B->nbs       = ec;
103   baij->B->n   = ec*mat->bs;
104   ierr = PetscFree(indices);CHKERRQ(ierr);
105 #endif
106 
107   /* create local vector that is used to scatter into */
108   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
109 
110   /* create two temporary index sets for building scatter-gather */
111   for (i=0; i<ec; i++) {
112     garray[i] = bs*garray[i];
113   }
114   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
115   for (i=0; i<ec; i++) {
116     garray[i] = garray[i]/bs;
117   }
118 
119   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
120   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
121   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
122   ierr = PetscFree(stmp);CHKERRQ(ierr);
123 
124   /* create temporary global vector to generate scatter context */
125   /* this is inefficient, but otherwise we must do either
126      1) save garray until the first actual scatter when the vector is known or
127      2) have another way of generating a scatter context without a vector.*/
128   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
129 
130   /* gnerate the scatter context */
131   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
132 
133   /*
134       Post the receives for the first matrix vector product. We sync-chronize after
135     this on the chance that the user immediately calls MatMult() after assemblying
136     the matrix.
137   */
138   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
139   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
140 
141   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
142   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
143   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
145   baij->garray = garray;
146   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
147   ierr = ISDestroy(from);CHKERRQ(ierr);
148   ierr = ISDestroy(to);CHKERRQ(ierr);
149   ierr = VecDestroy(gvec);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 /*
154      Takes the local part of an already assembled MPIBAIJ matrix
155    and disassembles it. This is to allow new nonzeros into the matrix
156    that require more communication in the matrix vector multiply.
157    Thus certain data-structures must be rebuilt.
158 
159    Kind of slow! But that's what application programmers get when
160    they are sloppy.
161 */
162 #undef __FUNCT__
163 #define __FUNCT__ "DisAssemble_MPIBAIJ"
164 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
165 {
166   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
167   Mat            B = baij->B,Bnew;
168   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
169   PetscErrorCode ierr;
170   PetscInt       i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
171   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->m;
172   MatScalar      *a = Bbaij->a;
173   PetscScalar    *atmp;
174 #if defined(PETSC_USE_MAT_SINGLE)
175   PetscInt       k;
176 #endif
177 
178   PetscFunctionBegin;
179   /* free stuff related to matrix-vec multiply */
180   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
181   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
182   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
183   if (baij->colmap) {
184 #if defined (PETSC_USE_CTABLE)
185     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
186 #else
187     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
188     baij->colmap = 0;
189     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
190 #endif
191   }
192 
193   /* make sure that B is assembled so we can access its values */
194   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196 
197   /* invent new B and copy stuff over */
198   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
199   for (i=0; i<mbs; i++) {
200     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
201   }
202   ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr);
203   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
204   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
205   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
206 
207 #if defined(PETSC_USE_MAT_SINGLE)
208   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
209 #endif
210     for (i=0; i<mbs; i++) {
211       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
212         col  = garray[Bbaij->j[j]];
213 #if defined(PETSC_USE_MAT_SINGLE)
214         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
215 #else
216         atmp = a + j*bs2;
217 #endif
218         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
219       }
220     }
221   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
222 
223 #if defined(PETSC_USE_MAT_SINGLE)
224   ierr = PetscFree(atmp);CHKERRQ(ierr);
225 #endif
226 
227   ierr = PetscFree(nz);CHKERRQ(ierr);
228   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
229   baij->garray = 0;
230   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
231   ierr = MatDestroy(B);CHKERRQ(ierr);
232   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
233   baij->B = Bnew;
234   A->was_assembled = PETSC_FALSE;
235   PetscFunctionReturn(0);
236 }
237 
238 /*      ugly stuff added for Glenn someday we should fix this up */
239 
240 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
241                                       parts of the local matrix */
242 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
243 
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
247 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
248 {
249   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
250   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
251   PetscErrorCode ierr;
252   PetscInt       bs = inA->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
253   PetscInt       *r_rmapd,*r_rmapo;
254 
255   PetscFunctionBegin;
256   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
257   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
258   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
259   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
260   nt   = 0;
261   for (i=0; i<inA->bmapping->n; i++) {
262     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
263       nt++;
264       r_rmapd[i] = inA->bmapping->indices[i] + 1;
265     }
266   }
267   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
268   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
269   for (i=0; i<inA->bmapping->n; i++) {
270     if (r_rmapd[i]){
271       for (j=0; j<bs; j++) {
272         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
273       }
274     }
275   }
276   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
277   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
278 
279   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
280   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
281   for (i=0; i<B->nbs; i++) {
282     lindices[garray[i]] = i+1;
283   }
284   no   = inA->bmapping->n - nt;
285   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
286   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
287   nt   = 0;
288   for (i=0; i<inA->bmapping->n; i++) {
289     if (lindices[inA->bmapping->indices[i]]) {
290       nt++;
291       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
292     }
293   }
294   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
295   ierr = PetscFree(lindices);CHKERRQ(ierr);
296   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
297   for (i=0; i<inA->bmapping->n; i++) {
298     if (r_rmapo[i]){
299       for (j=0; j<bs; j++) {
300         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
301       }
302     }
303   }
304   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
305   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
306 
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
312 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
313 {
314   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
315   PetscErrorCode ierr,(*f)(Mat,Vec);
316 
317   PetscFunctionBegin;
318   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
319   if (f) {
320     ierr = (*f)(A,scale);CHKERRQ(ierr);
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 EXTERN_C_BEGIN
326 #undef __FUNCT__
327 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
328 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
329 {
330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
331   PetscErrorCode ierr;
332   PetscInt       n,i;
333   PetscScalar    *d,*o,*s;
334 
335   PetscFunctionBegin;
336   if (!uglyrmapd) {
337     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
338   }
339 
340   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
341 
342   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
343   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
344   for (i=0; i<n; i++) {
345     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
346   }
347   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
348   /* column scale "diagonal" portion of local matrix */
349   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
350 
351   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
352   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
353   for (i=0; i<n; i++) {
354     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
355   }
356   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
357   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
358   /* column scale "off-diagonal" portion of local matrix */
359   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
360 
361   PetscFunctionReturn(0);
362 }
363 EXTERN_C_END
364 
365 
366