xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 94b7f48cc472a54ea2ce57edf1fe19e8a254237c)
1 /*$Id: mmbaij.c,v 1.46 2001/09/25 00:31:36 balay Exp $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
13 int MatSetUpMultiply_MPIBAIJ(Mat mat)
14 {
15   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
16   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
17   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
18   int                bs = baij->bs,*stmp;
19   IS                 from,to;
20   Vec                gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable         gid1_lid1;
23   PetscTablePosition tpos;
24   int                gid,lid;
25 #endif
26 
27   PetscFunctionBegin;
28 
29 #if defined (PETSC_USE_CTABLE)
30   /* use a table - Mark Adams */
31   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
32   for (i=0; i<B->mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       int data,gid1 = aj[B->i[i]+j] + 1;
35       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
36       if (!data) {
37         /* one based table */
38         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
39       }
40     }
41   }
42   /* form array of columns we need */
43   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<B->mbs; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       int gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i]+j] = lid;
62     }
63   }
64   B->nbs     = ec;
65   baij->B->n = ec*B->bs;
66   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
67   /* Mark Adams */
68 #else
69   /* Make an array as long as the number of columns */
70   /* mark those columns that are in baij->B */
71   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73   for (i=0; i<B->mbs; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j]] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
82   ec = 0;
83   for (i=0; i<Nbs; i++) {
84     if (indices[i]) {
85       garray[ec++] = i;
86     }
87   }
88 
89   /* make indices now point into garray */
90   for (i=0; i<ec; i++) {
91     indices[garray[i]] = i;
92   }
93 
94   /* compact out the extra columns in B */
95   for (i=0; i<B->mbs; i++) {
96     for (j=0; j<B->ilen[i]; j++) {
97       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98     }
99   }
100   B->nbs       = ec;
101   baij->B->n   = ec*B->bs;
102   ierr = PetscFree(indices);CHKERRQ(ierr);
103 #endif
104 
105   /* create local vector that is used to scatter into */
106   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
107 
108   /* create two temporary index sets for building scatter-gather */
109   for (i=0; i<ec; i++) {
110     garray[i] = bs*garray[i];
111   }
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
113   for (i=0; i<ec; i++) {
114     garray[i] = garray[i]/bs;
115   }
116 
117   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
118   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
119   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
120   ierr = PetscFree(stmp);CHKERRQ(ierr);
121 
122   /* create temporary global vector to generate scatter context */
123   /* this is inefficient, but otherwise we must do either
124      1) save garray until the first actual scatter when the vector is known or
125      2) have another way of generating a scatter context without a vector.*/
126   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
127 
128   /* gnerate the scatter context */
129   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
130 
131   /*
132       Post the receives for the first matrix vector product. We sync-chronize after
133     this on the chance that the user immediately calls MatMult() after assemblying
134     the matrix.
135   */
136   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
137   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
138 
139   PetscLogObjectParent(mat,baij->Mvctx);
140   PetscLogObjectParent(mat,baij->lvec);
141   PetscLogObjectParent(mat,from);
142   PetscLogObjectParent(mat,to);
143   baij->garray = garray;
144   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
145   ierr = ISDestroy(from);CHKERRQ(ierr);
146   ierr = ISDestroy(to);CHKERRQ(ierr);
147   ierr = VecDestroy(gvec);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /*
152      Takes the local part of an already assembled MPIBAIJ matrix
153    and disassembles it. This is to allow new nonzeros into the matrix
154    that require more communication in the matrix vector multiply.
155    Thus certain data-structures must be rebuilt.
156 
157    Kind of slow! But that's what application programmers get when
158    they are sloppy.
159 */
160 #undef __FUNCT__
161 #define __FUNCT__ "DisAssemble_MPIBAIJ"
162 int DisAssemble_MPIBAIJ(Mat A)
163 {
164   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
165   Mat          B = baij->B,Bnew;
166   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
167   int          ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
168   int          bs2 = baij->bs2,*nz,ec,m = A->m;
169   MatScalar    *a = Bbaij->a;
170   PetscScalar  *atmp;
171 #if defined(PETSC_USE_MAT_SINGLE)
172   int          k;
173 #endif
174 
175   PetscFunctionBegin;
176   /* free stuff related to matrix-vec multiply */
177   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
178   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
179   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
180   if (baij->colmap) {
181 #if defined (PETSC_USE_CTABLE)
182     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
183 #else
184     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
185     baij->colmap = 0;
186     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
187 #endif
188   }
189 
190   /* make sure that B is assembled so we can access its values */
191   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193 
194   /* invent new B and copy stuff over */
195   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
196   for (i=0; i<mbs; i++) {
197     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
198   }
199   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
200   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
201 
202 #if defined(PETSC_USE_MAT_SINGLE)
203   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
204 #endif
205     for (i=0; i<mbs; i++) {
206       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
207         col  = garray[Bbaij->j[j]];
208 #if defined(PETSC_USE_MAT_SINGLE)
209         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
210 #else
211         atmp = a + j*bs2;
212 #endif
213         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
214       }
215     }
216   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
217 
218 #if defined(PETSC_USE_MAT_SINGLE)
219   ierr = PetscFree(atmp);CHKERRQ(ierr);
220 #endif
221 
222   ierr = PetscFree(nz);CHKERRQ(ierr);
223   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
224   baij->garray = 0;
225   PetscLogObjectMemory(A,-ec*sizeof(int));
226   ierr = MatDestroy(B);CHKERRQ(ierr);
227   PetscLogObjectParent(A,Bnew);
228   baij->B = Bnew;
229   A->was_assembled = PETSC_FALSE;
230   PetscFunctionReturn(0);
231 }
232 
233 /*      ugly stuff added for Glenn someday we should fix this up */
234 
235 static int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
236                                       parts of the local matrix */
237 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
238 
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
242 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
243 {
244   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
245   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
246   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
247   int          ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
248   int          *r_rmapd,*r_rmapo;
249 
250   PetscFunctionBegin;
251   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
252   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
253   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
254   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
255   nt   = 0;
256   for (i=0; i<inA->bmapping->n; i++) {
257     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
258       nt++;
259       r_rmapd[i] = inA->bmapping->indices[i] + 1;
260     }
261   }
262   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
263   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
264   for (i=0; i<inA->bmapping->n; i++) {
265     if (r_rmapd[i]){
266       for (j=0; j<bs; j++) {
267         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
268       }
269     }
270   }
271   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
272   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
273 
274   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
275   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
276   for (i=0; i<B->nbs; i++) {
277     lindices[garray[i]] = i+1;
278   }
279   no   = inA->bmapping->n - nt;
280   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
281   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
282   nt   = 0;
283   for (i=0; i<inA->bmapping->n; i++) {
284     if (lindices[inA->bmapping->indices[i]]) {
285       nt++;
286       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
287     }
288   }
289   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
290   ierr = PetscFree(lindices);CHKERRQ(ierr);
291   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
292   for (i=0; i<inA->bmapping->n; i++) {
293     if (r_rmapo[i]){
294       for (j=0; j<bs; j++) {
295         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
296       }
297     }
298   }
299   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
300   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
301 
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
307 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
308 {
309   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
310   int ierr,(*f)(Mat,Vec);
311 
312   PetscFunctionBegin;
313   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
314   if (f) {
315     ierr = (*f)(A,scale);CHKERRQ(ierr);
316   }
317   PetscFunctionReturn(0);
318 }
319 
320 EXTERN_C_BEGIN
321 #undef __FUNCT__
322 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
323 int MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
324 {
325   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
326   int          ierr,n,i;
327   PetscScalar  *d,*o,*s;
328 
329   PetscFunctionBegin;
330   if (!uglyrmapd) {
331     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
332   }
333 
334   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
335 
336   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
337   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
338   for (i=0; i<n; i++) {
339     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
340   }
341   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
342   /* column scale "diagonal" portion of local matrix */
343   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
344 
345   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
346   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
347   for (i=0; i<n; i++) {
348     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
349   }
350   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
351   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
352   /* column scale "off-diagonal" portion of local matrix */
353   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
354 
355   PetscFunctionReturn(0);
356 }
357 EXTERN_C_END
358 
359 
360