xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 72ce74bd30d5d09d1371a3eb387704f4af395d1a)
1 /*
2    Support for the parallel BAIJ matrix vector multiply
3 */
4 #include "src/mat/impls/baij/mpi/mpibaij.h"
5 
6 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
10 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
11 {
12   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
13   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
14   int                i,j,*aj = B->j,ierr,ec = 0,*garray;
15   int                bs = baij->bs,*stmp;
16   IS                 from,to;
17   Vec                gvec;
18 #if defined (PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   int                gid,lid;
22 #else
23   int                Nbs = baij->Nbs,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--; lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
50   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
51   for (i=0; i<ec; i++) {
52     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
53   }
54   /* compact out the extra columns in B */
55   for (i=0; i<B->mbs; i++) {
56     for (j=0; j<B->ilen[i]; j++) {
57       int gid1 = aj[B->i[i] + j] + 1;
58       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
59       lid --;
60       aj[B->i[i]+j] = lid;
61     }
62   }
63   B->nbs     = ec;
64   baij->B->n = ec*B->bs;
65   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
66   /* Mark Adams */
67 #else
68   /* Make an array as long as the number of columns */
69   /* mark those columns that are in baij->B */
70   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
71   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
72   for (i=0; i<B->mbs; i++) {
73     for (j=0; j<B->ilen[i]; j++) {
74       if (!indices[aj[B->i[i] + j]]) ec++;
75       indices[aj[B->i[i] + j]] = 1;
76     }
77   }
78 
79   /* form array of columns we need */
80   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
81   ec = 0;
82   for (i=0; i<Nbs; i++) {
83     if (indices[i]) {
84       garray[ec++] = i;
85     }
86   }
87 
88   /* make indices now point into garray */
89   for (i=0; i<ec; i++) {
90     indices[garray[i]] = i;
91   }
92 
93   /* compact out the extra columns in B */
94   for (i=0; i<B->mbs; i++) {
95     for (j=0; j<B->ilen[i]; j++) {
96       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
97     }
98   }
99   B->nbs       = ec;
100   baij->B->n   = ec*B->bs;
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103 
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   for (i=0; i<ec; i++) {
109     garray[i] = bs*garray[i];
110   }
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
112   for (i=0; i<ec; i++) {
113     garray[i] = garray[i]/bs;
114   }
115 
116   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
119   ierr = PetscFree(stmp);CHKERRQ(ierr);
120 
121   /* create temporary global vector to generate scatter context */
122   /* this is inefficient, but otherwise we must do either
123      1) save garray until the first actual scatter when the vector is known or
124      2) have another way of generating a scatter context without a vector.*/
125   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
126 
127   /* gnerate the scatter context */
128   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
129 
130   /*
131       Post the receives for the first matrix vector product. We sync-chronize after
132     this on the chance that the user immediately calls MatMult() after assemblying
133     the matrix.
134   */
135   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
136   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
137 
138   PetscLogObjectParent(mat,baij->Mvctx);
139   PetscLogObjectParent(mat,baij->lvec);
140   PetscLogObjectParent(mat,from);
141   PetscLogObjectParent(mat,to);
142   baij->garray = garray;
143   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
144   ierr = ISDestroy(from);CHKERRQ(ierr);
145   ierr = ISDestroy(to);CHKERRQ(ierr);
146   ierr = VecDestroy(gvec);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 /*
151      Takes the local part of an already assembled MPIBAIJ matrix
152    and disassembles it. This is to allow new nonzeros into the matrix
153    that require more communication in the matrix vector multiply.
154    Thus certain data-structures must be rebuilt.
155 
156    Kind of slow! But that's what application programmers get when
157    they are sloppy.
158 */
159 #undef __FUNCT__
160 #define __FUNCT__ "DisAssemble_MPIBAIJ"
161 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
162 {
163   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
164   Mat          B = baij->B,Bnew;
165   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
166   PetscErrorCode ierr;
167   int i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
168   int          bs2 = baij->bs2,*nz,ec,m = A->m;
169   MatScalar    *a = Bbaij->a;
170   PetscScalar  *atmp;
171 #if defined(PETSC_USE_MAT_SINGLE)
172   int          k;
173 #endif
174 
175   PetscFunctionBegin;
176   /* free stuff related to matrix-vec multiply */
177   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
178   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
179   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
180   if (baij->colmap) {
181 #if defined (PETSC_USE_CTABLE)
182     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
183 #else
184     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
185     baij->colmap = 0;
186     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
187 #endif
188   }
189 
190   /* make sure that B is assembled so we can access its values */
191   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193 
194   /* invent new B and copy stuff over */
195   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
196   for (i=0; i<mbs; i++) {
197     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
198   }
199   ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr);
200   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
201   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
202   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
203 
204 #if defined(PETSC_USE_MAT_SINGLE)
205   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
206 #endif
207     for (i=0; i<mbs; i++) {
208       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
209         col  = garray[Bbaij->j[j]];
210 #if defined(PETSC_USE_MAT_SINGLE)
211         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
212 #else
213         atmp = a + j*bs2;
214 #endif
215         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
216       }
217     }
218   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
219 
220 #if defined(PETSC_USE_MAT_SINGLE)
221   ierr = PetscFree(atmp);CHKERRQ(ierr);
222 #endif
223 
224   ierr = PetscFree(nz);CHKERRQ(ierr);
225   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
226   baij->garray = 0;
227   PetscLogObjectMemory(A,-ec*sizeof(int));
228   ierr = MatDestroy(B);CHKERRQ(ierr);
229   PetscLogObjectParent(A,Bnew);
230   baij->B = Bnew;
231   A->was_assembled = PETSC_FALSE;
232   PetscFunctionReturn(0);
233 }
234 
235 /*      ugly stuff added for Glenn someday we should fix this up */
236 
237 static int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
238                                       parts of the local matrix */
239 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
240 
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
244 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
245 {
246   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
247   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
248   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
249   PetscErrorCode ierr;
250   int bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
251   int          *r_rmapd,*r_rmapo;
252 
253   PetscFunctionBegin;
254   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
255   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
256   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
257   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
258   nt   = 0;
259   for (i=0; i<inA->bmapping->n; i++) {
260     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
261       nt++;
262       r_rmapd[i] = inA->bmapping->indices[i] + 1;
263     }
264   }
265   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
266   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
267   for (i=0; i<inA->bmapping->n; i++) {
268     if (r_rmapd[i]){
269       for (j=0; j<bs; j++) {
270         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
271       }
272     }
273   }
274   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
275   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
276 
277   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
278   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
279   for (i=0; i<B->nbs; i++) {
280     lindices[garray[i]] = i+1;
281   }
282   no   = inA->bmapping->n - nt;
283   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
284   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
285   nt   = 0;
286   for (i=0; i<inA->bmapping->n; i++) {
287     if (lindices[inA->bmapping->indices[i]]) {
288       nt++;
289       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
290     }
291   }
292   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
293   ierr = PetscFree(lindices);CHKERRQ(ierr);
294   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
295   for (i=0; i<inA->bmapping->n; i++) {
296     if (r_rmapo[i]){
297       for (j=0; j<bs; j++) {
298         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
299       }
300     }
301   }
302   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
303   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
304 
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
310 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
311 {
312   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
313   PetscErrorCode ierr,(*f)(Mat,Vec);
314 
315   PetscFunctionBegin;
316   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
317   if (f) {
318     ierr = (*f)(A,scale);CHKERRQ(ierr);
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 EXTERN_C_BEGIN
324 #undef __FUNCT__
325 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
326 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
327 {
328   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
329   PetscErrorCode ierr;
330   int n,i;
331   PetscScalar  *d,*o,*s;
332 
333   PetscFunctionBegin;
334   if (!uglyrmapd) {
335     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
336   }
337 
338   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
339 
340   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
341   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
342   for (i=0; i<n; i++) {
343     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
344   }
345   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
346   /* column scale "diagonal" portion of local matrix */
347   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
348 
349   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
350   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
351   for (i=0; i<n; i++) {
352     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
353   }
354   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
355   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
356   /* column scale "off-diagonal" portion of local matrix */
357   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
358 
359   PetscFunctionReturn(0);
360 }
361 EXTERN_C_END
362 
363 
364