xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: mmbaij.c,v 1.46 2001/09/25 00:31:36 balay Exp $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 int MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17   int                bs = baij->bs,*stmp;
18   IS                 from,to;
19   Vec                gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   int                gid,lid;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--; lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
50   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
51   for (i=0; i<ec; i++) {
52     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
53   }
54   /* compact out the extra columns in B */
55   for (i=0; i<B->mbs; i++) {
56     for (j=0; j<B->ilen[i]; j++) {
57       int gid1 = aj[B->i[i] + j] + 1;
58       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
59       lid --;
60       aj[B->i[i]+j] = lid;
61     }
62   }
63   B->nbs     = ec;
64   baij->B->n = ec*B->bs;
65   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
66   /* Mark Adams */
67 #else
68   /* Make an array as long as the number of columns */
69   /* mark those columns that are in baij->B */
70   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
71   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
72   for (i=0; i<B->mbs; i++) {
73     for (j=0; j<B->ilen[i]; j++) {
74       if (!indices[aj[B->i[i] + j]]) ec++;
75       indices[aj[B->i[i] + j]] = 1;
76     }
77   }
78 
79   /* form array of columns we need */
80   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
81   ec = 0;
82   for (i=0; i<Nbs; i++) {
83     if (indices[i]) {
84       garray[ec++] = i;
85     }
86   }
87 
88   /* make indices now point into garray */
89   for (i=0; i<ec; i++) {
90     indices[garray[i]] = i;
91   }
92 
93   /* compact out the extra columns in B */
94   for (i=0; i<B->mbs; i++) {
95     for (j=0; j<B->ilen[i]; j++) {
96       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
97     }
98   }
99   B->nbs       = ec;
100   baij->B->n   = ec*B->bs;
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103 
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   for (i=0; i<ec; i++) {
109     garray[i] = bs*garray[i];
110   }
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
112   for (i=0; i<ec; i++) {
113     garray[i] = garray[i]/bs;
114   }
115 
116   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
119   ierr = PetscFree(stmp);CHKERRQ(ierr);
120 
121   /* create temporary global vector to generate scatter context */
122   /* this is inefficient, but otherwise we must do either
123      1) save garray until the first actual scatter when the vector is known or
124      2) have another way of generating a scatter context without a vector.*/
125   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
126 
127   /* gnerate the scatter context */
128   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
129 
130   /*
131       Post the receives for the first matrix vector product. We sync-chronize after
132     this on the chance that the user immediately calls MatMult() after assemblying
133     the matrix.
134   */
135   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
136   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
137 
138   PetscLogObjectParent(mat,baij->Mvctx);
139   PetscLogObjectParent(mat,baij->lvec);
140   PetscLogObjectParent(mat,from);
141   PetscLogObjectParent(mat,to);
142   baij->garray = garray;
143   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
144   ierr = ISDestroy(from);CHKERRQ(ierr);
145   ierr = ISDestroy(to);CHKERRQ(ierr);
146   ierr = VecDestroy(gvec);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 /*
151      Takes the local part of an already assembled MPIBAIJ matrix
152    and disassembles it. This is to allow new nonzeros into the matrix
153    that require more communication in the matrix vector multiply.
154    Thus certain data-structures must be rebuilt.
155 
156    Kind of slow! But that's what application programmers get when
157    they are sloppy.
158 */
159 #undef __FUNCT__
160 #define __FUNCT__ "DisAssemble_MPIBAIJ"
161 int DisAssemble_MPIBAIJ(Mat A)
162 {
163   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
164   Mat          B = baij->B,Bnew;
165   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
166   int          ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
167   int          bs2 = baij->bs2,*nz,ec,m = A->m;
168   MatScalar    *a = Bbaij->a;
169   PetscScalar  *atmp;
170 #if defined(PETSC_USE_MAT_SINGLE)
171   int          k;
172 #endif
173 
174   PetscFunctionBegin;
175   /* free stuff related to matrix-vec multiply */
176   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
177   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
178   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
179   if (baij->colmap) {
180 #if defined (PETSC_USE_CTABLE)
181     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
182 #else
183     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
184     baij->colmap = 0;
185     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
186 #endif
187   }
188 
189   /* make sure that B is assembled so we can access its values */
190   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192 
193   /* invent new B and copy stuff over */
194   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
195   for (i=0; i<mbs; i++) {
196     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
197   }
198   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
199   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
200 
201 #if defined(PETSC_USE_MAT_SINGLE)
202   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
203 #endif
204     for (i=0; i<mbs; i++) {
205       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
206         col  = garray[Bbaij->j[j]];
207 #if defined(PETSC_USE_MAT_SINGLE)
208         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
209 #else
210         atmp = a + j*bs2;
211 #endif
212         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
213       }
214     }
215   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
216 
217 #if defined(PETSC_USE_MAT_SINGLE)
218   ierr = PetscFree(atmp);CHKERRQ(ierr);
219 #endif
220 
221   ierr = PetscFree(nz);CHKERRQ(ierr);
222   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
223   baij->garray = 0;
224   PetscLogObjectMemory(A,-ec*sizeof(int));
225   ierr = MatDestroy(B);CHKERRQ(ierr);
226   PetscLogObjectParent(A,Bnew);
227   baij->B = Bnew;
228   A->was_assembled = PETSC_FALSE;
229   PetscFunctionReturn(0);
230 }
231 
232 /*      ugly stuff added for Glenn someday we should fix this up */
233 
234 static int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
235                                       parts of the local matrix */
236 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
241 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
242 {
243   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
244   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
245   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
246   int          ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
247   int          *r_rmapd,*r_rmapo;
248 
249   PetscFunctionBegin;
250   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
251   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
252   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
253   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
254   nt   = 0;
255   for (i=0; i<inA->bmapping->n; i++) {
256     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
257       nt++;
258       r_rmapd[i] = inA->bmapping->indices[i] + 1;
259     }
260   }
261   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
262   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
263   for (i=0; i<inA->bmapping->n; i++) {
264     if (r_rmapd[i]){
265       for (j=0; j<bs; j++) {
266         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
267       }
268     }
269   }
270   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
271   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
274   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
275   for (i=0; i<B->nbs; i++) {
276     lindices[garray[i]] = i+1;
277   }
278   no   = inA->bmapping->n - nt;
279   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
280   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
281   nt   = 0;
282   for (i=0; i<inA->bmapping->n; i++) {
283     if (lindices[inA->bmapping->indices[i]]) {
284       nt++;
285       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
286     }
287   }
288   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
289   ierr = PetscFree(lindices);CHKERRQ(ierr);
290   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
291   for (i=0; i<inA->bmapping->n; i++) {
292     if (r_rmapo[i]){
293       for (j=0; j<bs; j++) {
294         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
295       }
296     }
297   }
298   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
299   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
300 
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
306 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
307 {
308   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
309   int ierr,(*f)(Mat,Vec);
310 
311   PetscFunctionBegin;
312   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
313   if (f) {
314     ierr = (*f)(A,scale);CHKERRQ(ierr);
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 EXTERN_C_BEGIN
320 #undef __FUNCT__
321 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
322 int MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
323 {
324   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
325   int          ierr,n,i;
326   PetscScalar  *d,*o,*s;
327 
328   PetscFunctionBegin;
329   if (!uglyrmapd) {
330     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
331   }
332 
333   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
334 
335   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
336   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
337   for (i=0; i<n; i++) {
338     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
339   }
340   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
341   /* column scale "diagonal" portion of local matrix */
342   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
343 
344   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
345   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
346   for (i=0; i<n; i++) {
347     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
348   }
349   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
350   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
351   /* column scale "off-diagonal" portion of local matrix */
352   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
353 
354   PetscFunctionReturn(0);
355 }
356 EXTERN_C_END
357 
358 
359