xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 5968eb51c36a0eee2a17bcd341216da8f7bdf0eb)
1 /*$Id: mmbaij.c,v 1.46 2001/09/25 00:31:36 balay Exp $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 int MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16   int                i,j,*aj = B->j,ierr,ec = 0,*garray;
17   int                bs = baij->bs,*stmp;
18   IS                 from,to;
19   Vec                gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   int                gid,lid;
24 #else
25   int                Nbs = baij->Nbs,*indices;
26 #endif
27 
28   PetscFunctionBegin;
29 
30 #if defined (PETSC_USE_CTABLE)
31   /* use a table - Mark Adams */
32   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
33   for (i=0; i<B->mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       int data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
45   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
46   while (tpos) {
47     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
48     gid--; lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       int gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->n = ec*B->bs;
67   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68   /* Mark Adams */
69 #else
70   /* Make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j]] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
83   ec = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs       = ec;
102   baij->B->n   = ec*B->bs;
103   ierr = PetscFree(indices);CHKERRQ(ierr);
104 #endif
105 
106   /* create local vector that is used to scatter into */
107   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
108 
109   /* create two temporary index sets for building scatter-gather */
110   for (i=0; i<ec; i++) {
111     garray[i] = bs*garray[i];
112   }
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
114   for (i=0; i<ec; i++) {
115     garray[i] = garray[i]/bs;
116   }
117 
118   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
119   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
120   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
121   ierr = PetscFree(stmp);CHKERRQ(ierr);
122 
123   /* create temporary global vector to generate scatter context */
124   /* this is inefficient, but otherwise we must do either
125      1) save garray until the first actual scatter when the vector is known or
126      2) have another way of generating a scatter context without a vector.*/
127   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
128 
129   /* gnerate the scatter context */
130   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
131 
132   /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
138   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
139 
140   PetscLogObjectParent(mat,baij->Mvctx);
141   PetscLogObjectParent(mat,baij->lvec);
142   PetscLogObjectParent(mat,from);
143   PetscLogObjectParent(mat,to);
144   baij->garray = garray;
145   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
146   ierr = ISDestroy(from);CHKERRQ(ierr);
147   ierr = ISDestroy(to);CHKERRQ(ierr);
148   ierr = VecDestroy(gvec);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*
153      Takes the local part of an already assembled MPIBAIJ matrix
154    and disassembles it. This is to allow new nonzeros into the matrix
155    that require more communication in the matrix vector multiply.
156    Thus certain data-structures must be rebuilt.
157 
158    Kind of slow! But that's what application programmers get when
159    they are sloppy.
160 */
161 #undef __FUNCT__
162 #define __FUNCT__ "DisAssemble_MPIBAIJ"
163 int DisAssemble_MPIBAIJ(Mat A)
164 {
165   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)A->data;
166   Mat          B = baij->B,Bnew;
167   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ*)B->data;
168   int          ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
169   int          bs2 = baij->bs2,*nz,ec,m = A->m;
170   MatScalar    *a = Bbaij->a;
171   PetscScalar  *atmp;
172 #if defined(PETSC_USE_MAT_SINGLE)
173   int          k;
174 #endif
175 
176   PetscFunctionBegin;
177   /* free stuff related to matrix-vec multiply */
178   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
179   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
180   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
181   if (baij->colmap) {
182 #if defined (PETSC_USE_CTABLE)
183     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
184 #else
185     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
186     baij->colmap = 0;
187     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
188 #endif
189   }
190 
191   /* make sure that B is assembled so we can access its values */
192   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
194 
195   /* invent new B and copy stuff over */
196   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
197   for (i=0; i<mbs; i++) {
198     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
199   }
200   ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr);
201   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
202   ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr);
203   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
204 
205 #if defined(PETSC_USE_MAT_SINGLE)
206   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
207 #endif
208     for (i=0; i<mbs; i++) {
209       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
210         col  = garray[Bbaij->j[j]];
211 #if defined(PETSC_USE_MAT_SINGLE)
212         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
213 #else
214         atmp = a + j*bs2;
215 #endif
216         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
217       }
218     }
219   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
220 
221 #if defined(PETSC_USE_MAT_SINGLE)
222   ierr = PetscFree(atmp);CHKERRQ(ierr);
223 #endif
224 
225   ierr = PetscFree(nz);CHKERRQ(ierr);
226   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
227   baij->garray = 0;
228   PetscLogObjectMemory(A,-ec*sizeof(int));
229   ierr = MatDestroy(B);CHKERRQ(ierr);
230   PetscLogObjectParent(A,Bnew);
231   baij->B = Bnew;
232   A->was_assembled = PETSC_FALSE;
233   PetscFunctionReturn(0);
234 }
235 
236 /*      ugly stuff added for Glenn someday we should fix this up */
237 
238 static int *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
239                                       parts of the local matrix */
240 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
241 
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
245 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
246 {
247   Mat_MPIBAIJ  *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
248   Mat_SeqBAIJ  *A = (Mat_SeqBAIJ*)ina->A->data;
249   Mat_SeqBAIJ  *B = (Mat_SeqBAIJ*)ina->B->data;
250   int          ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
251   int          *r_rmapd,*r_rmapo;
252 
253   PetscFunctionBegin;
254   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
255   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
256   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr);
257   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
258   nt   = 0;
259   for (i=0; i<inA->bmapping->n; i++) {
260     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
261       nt++;
262       r_rmapd[i] = inA->bmapping->indices[i] + 1;
263     }
264   }
265   if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n);
266   ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr);
267   for (i=0; i<inA->bmapping->n; i++) {
268     if (r_rmapd[i]){
269       for (j=0; j<bs; j++) {
270         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
271       }
272     }
273   }
274   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
275   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
276 
277   ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr);
278   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr);
279   for (i=0; i<B->nbs; i++) {
280     lindices[garray[i]] = i+1;
281   }
282   no   = inA->bmapping->n - nt;
283   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr);
284   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr);
285   nt   = 0;
286   for (i=0; i<inA->bmapping->n; i++) {
287     if (lindices[inA->bmapping->indices[i]]) {
288       nt++;
289       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
290     }
291   }
292   if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n);
293   ierr = PetscFree(lindices);CHKERRQ(ierr);
294   ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr);
295   for (i=0; i<inA->bmapping->n; i++) {
296     if (r_rmapo[i]){
297       for (j=0; j<bs; j++) {
298         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
299       }
300     }
301   }
302   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
303   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
304 
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
310 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
311 {
312   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
313   int ierr,(*f)(Mat,Vec);
314 
315   PetscFunctionBegin;
316   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
317   if (f) {
318     ierr = (*f)(A,scale);CHKERRQ(ierr);
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 EXTERN_C_BEGIN
324 #undef __FUNCT__
325 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
326 int MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
327 {
328   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
329   int          ierr,n,i;
330   PetscScalar  *d,*o,*s;
331 
332   PetscFunctionBegin;
333   if (!uglyrmapd) {
334     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
335   }
336 
337   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
338 
339   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
340   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
341   for (i=0; i<n; i++) {
342     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
343   }
344   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
345   /* column scale "diagonal" portion of local matrix */
346   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
347 
348   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
349   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
350   for (i=0; i<n; i++) {
351     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
352   }
353   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
354   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
355   /* column scale "off-diagonal" portion of local matrix */
356   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
357 
358   PetscFunctionReturn(0);
359 }
360 EXTERN_C_END
361 
362 
363