xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 /*
2    Support for the parallel BAIJ matrix vector multiply
3 */
4 #include "src/mat/impls/baij/mpi/mpibaij.h"
5 
6 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
10 int MatSetUpMultiply_MPIBAIJ(Mat mat)
11 {
12   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
13   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
14   int                i,j,*aj = B->j,ierr,ec = 0,*garray;
15   int                bs = baij->bs,*stmp;
16   IS                 from,to;
17   Vec                gvec;
18 #if defined (PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   int                gid,lid;
22 #else
23   int                Nbs = baij->Nbs,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--; lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
50   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
51   for (i=0; i<ec; i++) {
52     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
53   }
54   /* compact out the extra columns in B */
55   for (i=0; i<B->mbs; i++) {
56     for (j=0; j<B->ilen[i]; j++) {
57       int gid1 = aj[B->i[i] + j] + 1;
58       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
59       lid --;
60       aj[B->i[i]+j] = lid;
61     }
62   }
63   B->nbs     = ec;
64   baij->B->n = ec*B->bs;
65   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
66   /* Mark Adams */
67 #else
68   /* Make an array as long as the number of columns */
69   /* mark those columns that are in baij->B */
70   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
71   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
72   for (i=0; i<B->mbs; i++) {
73     for (j=0; j<B->ilen[i]; j++) {
74       if (!indices[aj[B->i[i] + j]]) ec++;
75       indices[aj[B->i[i] + j]] = 1;
76     }
77   }
78 
79   /* form array of columns we need */
80   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
81   ec = 0;
82   for (i=0; i<Nbs; i++) {
83     if (indices[i]) {
84       garray[ec++] = i;
85     }
86   }
87 
88   /* make indices now point into garray */
89   for (i=0; i<ec; i++) {
90     indices[garray[i]] = i;
91   }
92 
93   /* compact out the extra columns in B */
94   for (i=0; i<B->mbs; i++) {
95     for (j=0; j<B->ilen[i]; j++) {
96       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
97     }
98   }
99   B->nbs       = ec;
100   baij->B->n   = ec*B->bs;
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103 
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   for (i=0; i<ec; i++) {
109     garray[i] = bs*garray[i];
110   }
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
112   for (i=0; i<ec; i++) {
113     garray[i] = garray[i]/bs;
114   }
115 
116   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
119   ierr = PetscFree(stmp);CHKERRQ(ierr);
120 
121   /* create temporary global vector to generate scatter context */
122   /* this is inefficient, but otherwise we must do either
123      1) save garray until the first actual scatter when the vector is known or
124      2) have another way of generating a scatter context without a vector.*/
125   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
126 
127   /* gnerate the scatter context */
128   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
129 
130   /*
131       Post the receives for the first matrix vector product. We sync-chronize after
132     this on the chance that the user immediately calls MatMult() after assemblying
133     the matrix.
134   */
135   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
136   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
137 
138   PetscLogObjectParent(mat,baij->Mvctx);
139   PetscLogObjectParent(mat,baij->lvec);
140   PetscLogObjectParent(mat,from);
141   PetscLogObjectParent(mat,to);
142   baij->garray = garray;
143   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
144   ierr = ISDestroy(from);CHKERRQ(ierr);
145   ierr = ISDestroy(to);CHKERRQ(ierr);
146   ierr = VecDestroy(gvec);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 /*
151      Takes the local part of an already assembled MPIBAIJ matrix
152    and disassembles it. This is to allow new nonzeros into the matrix
153    that require more communication in the matrix vector multiply.
154    Thus certain data-structures must be rebuilt.
155 
156    Kind of slow! But that's what application programmers get when
157    they are sloppy.
158 */
159 #undef __FUNCT__
160 #define __FUNCT__ "DisAssemble_MPIBAIJ"
161 int DisAssemble_MPIBAIJ(Mat A)
162 {
163   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
164   Mat          B = baij->B,Bnew;
165   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
166   int          ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
167   int          bs2 = baij->bs2,*nz,ec,m = A->m;
168   MatScalar    *a = Bbaij->a;
169   PetscScalar  *atmp;
170 #if defined(PETSC_USE_MAT_SINGLE)
171   int          k;
172 #endif
173 
174   PetscFunctionBegin;
175   /* free stuff related to matrix-vec multiply */
176   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
177   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
178   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
179   if (baij->colmap) {
180 #if defined (PETSC_USE_CTABLE)
181     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
182 #else
183     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
184     baij->colmap = 0;
185     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
186 #endif
187   }
188 
189   /* make sure that B is assembled so we can access its values */
190   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192 
193   /* invent new B and copy stuff over */
194   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
195   for (i=0; i<mbs; i++) {
196     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
197   }
198   ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr);
199   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
200   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
201   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
202 
203 #if defined(PETSC_USE_MAT_SINGLE)
204   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
205 #endif
206     for (i=0; i<mbs; i++) {
207       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
208         col  = garray[Bbaij->j[j]];
209 #if defined(PETSC_USE_MAT_SINGLE)
210         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
211 #else
212         atmp = a + j*bs2;
213 #endif
214         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
215       }
216     }
217   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
218 
219 #if defined(PETSC_USE_MAT_SINGLE)
220   ierr = PetscFree(atmp);CHKERRQ(ierr);
221 #endif
222 
223   ierr = PetscFree(nz);CHKERRQ(ierr);
224   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
225   baij->garray = 0;
226   PetscLogObjectMemory(A,-ec*sizeof(int));
227   ierr = MatDestroy(B);CHKERRQ(ierr);
228   PetscLogObjectParent(A,Bnew);
229   baij->B = Bnew;
230   A->was_assembled = PETSC_FALSE;
231   PetscFunctionReturn(0);
232 }
233 
234 /*      ugly stuff added for Glenn someday we should fix this up */
235 
236 static int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
237                                       parts of the local matrix */
238 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
239 
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
243 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
244 {
245   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
246   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
247   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
248   int          ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
249   int          *r_rmapd,*r_rmapo;
250 
251   PetscFunctionBegin;
252   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
253   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
254   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
255   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
256   nt   = 0;
257   for (i=0; i<inA->bmapping->n; i++) {
258     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
259       nt++;
260       r_rmapd[i] = inA->bmapping->indices[i] + 1;
261     }
262   }
263   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
264   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
265   for (i=0; i<inA->bmapping->n; i++) {
266     if (r_rmapd[i]){
267       for (j=0; j<bs; j++) {
268         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
269       }
270     }
271   }
272   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
273   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
274 
275   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
276   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
277   for (i=0; i<B->nbs; i++) {
278     lindices[garray[i]] = i+1;
279   }
280   no   = inA->bmapping->n - nt;
281   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
282   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
283   nt   = 0;
284   for (i=0; i<inA->bmapping->n; i++) {
285     if (lindices[inA->bmapping->indices[i]]) {
286       nt++;
287       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
288     }
289   }
290   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
291   ierr = PetscFree(lindices);CHKERRQ(ierr);
292   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
293   for (i=0; i<inA->bmapping->n; i++) {
294     if (r_rmapo[i]){
295       for (j=0; j<bs; j++) {
296         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
297       }
298     }
299   }
300   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
301   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
302 
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
308 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
309 {
310   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
311   int ierr,(*f)(Mat,Vec);
312 
313   PetscFunctionBegin;
314   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
315   if (f) {
316     ierr = (*f)(A,scale);CHKERRQ(ierr);
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 EXTERN_C_BEGIN
322 #undef __FUNCT__
323 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
324 int MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
325 {
326   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
327   int          ierr,n,i;
328   PetscScalar  *d,*o,*s;
329 
330   PetscFunctionBegin;
331   if (!uglyrmapd) {
332     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
333   }
334 
335   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
336 
337   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
338   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
339   for (i=0; i<n; i++) {
340     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
341   }
342   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
343   /* column scale "diagonal" portion of local matrix */
344   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
345 
346   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
347   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
348   for (i=0; i<n; i++) {
349     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
350   }
351   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
352   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
353   /* column scale "off-diagonal" portion of local matrix */
354   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
355 
356   PetscFunctionReturn(0);
357 }
358 EXTERN_C_END
359 
360 
361