xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 8ddb5d8b44839bc6385c771bd2d6d5c8cd353003)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 
7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
14   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
15   PetscErrorCode ierr;
16   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
17   PetscInt       bs = mat->rmap->bs,*stmp;
18   IS             from,to;
19   Vec            gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable     gid1_lid1;
22   PetscTablePosition tpos;
23   PetscInt       gid,lid;
24 #else
25   PetscInt       Nbs = baij->Nbs,*indices;
26 #endif
27 
28   PetscFunctionBegin;
29 
30 #if defined (PETSC_USE_CTABLE)
31   /* use a table - Mark Adams */
32   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
33   for (i=0; i<B->mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
45   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
46   while (tpos) {
47     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
48     gid--; lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       PetscInt gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
67   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
68   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
69 #else
70   /* Make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j]] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
83   ec = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs       = ec;
102   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
103   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
104   ierr = PetscFree(indices);CHKERRQ(ierr);
105 #endif
106 
107   /* create local vector that is used to scatter into */
108   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
109 
110   /* create two temporary index sets for building scatter-gather */
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
112 
113   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
114   for (i=0; i<ec; i++) { stmp[i] = i; }
115   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
116 
117   /* create temporary global vector to generate scatter context */
118   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
119 
120   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
121 
122   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
123   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
126   baij->garray = garray;
127   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
128   ierr = ISDestroy(&from);CHKERRQ(ierr);
129   ierr = ISDestroy(&to);CHKERRQ(ierr);
130   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 /*
135      Takes the local part of an already assembled MPIBAIJ matrix
136    and disassembles it. This is to allow new nonzeros into the matrix
137    that require more communication in the matrix vector multiply.
138    Thus certain data-structures must be rebuilt.
139 
140    Kind of slow! But that's what application programmers get when
141    they are sloppy.
142 */
143 #undef __FUNCT__
144 #define __FUNCT__ "DisAssemble_MPIBAIJ"
145 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
146 {
147   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
148   Mat            B = baij->B,Bnew;
149   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
150   PetscErrorCode ierr;
151   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
152   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
153   MatScalar      *a = Bbaij->a;
154   MatScalar      *atmp;
155 
156 
157   PetscFunctionBegin;
158   /* free stuff related to matrix-vec multiply */
159   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
160   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
161   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
162   if (baij->colmap) {
163 #if defined (PETSC_USE_CTABLE)
164     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
165 #else
166     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
167     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
168 #endif
169   }
170 
171   /* make sure that B is assembled so we can access its values */
172   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
173   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
174 
175   /* invent new B and copy stuff over */
176   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
177   for (i=0; i<mbs; i++) {
178     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
179   }
180   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
181   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
182   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
183   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
184   ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
185   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
186 
187   for (i=0; i<mbs; i++) {
188     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
189       col  = garray[Bbaij->j[j]];
190       atmp = a + j*bs2;
191       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
192     }
193   }
194   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
195 
196   ierr = PetscFree(nz);CHKERRQ(ierr);
197   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = MatDestroy(&B);CHKERRQ(ierr);
200   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
201   baij->B = Bnew;
202   A->was_assembled = PETSC_FALSE;
203   A->assembled     = PETSC_FALSE;
204   PetscFunctionReturn(0);
205 }
206 
207 /*      ugly stuff added for Glenn someday we should fix this up */
208 
209 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
210                                       parts of the local matrix */
211 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
212 
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
216 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
217 {
218   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
219   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
220   PetscErrorCode ierr;
221   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
222   PetscInt       *r_rmapd,*r_rmapo;
223 
224   PetscFunctionBegin;
225   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
226   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
227   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
228   ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
229   nt   = 0;
230   for (i=0; i<inA->rmap->bmapping->n; i++) {
231     if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) {
232       nt++;
233       r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1;
234     }
235   }
236   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
237   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
238   for (i=0; i<inA->rmap->bmapping->n; i++) {
239     if (r_rmapd[i]){
240       for (j=0; j<bs; j++) {
241         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
242       }
243     }
244   }
245   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
246   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
247 
248   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
249   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
250   for (i=0; i<B->nbs; i++) {
251     lindices[garray[i]] = i+1;
252   }
253   no   = inA->rmap->bmapping->n - nt;
254   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
255   ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
256   nt   = 0;
257   for (i=0; i<inA->rmap->bmapping->n; i++) {
258     if (lindices[inA->rmap->bmapping->indices[i]]) {
259       nt++;
260       r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]];
261     }
262   }
263   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
264   ierr = PetscFree(lindices);CHKERRQ(ierr);
265   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
266   for (i=0; i<inA->rmap->bmapping->n; i++) {
267     if (r_rmapo[i]){
268       for (j=0; j<bs; j++) {
269         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
270       }
271     }
272   }
273   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
274   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
275 
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
281 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
282 {
283   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
284   PetscErrorCode ierr;
285 
286   PetscFunctionBegin;
287   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 EXTERN_C_BEGIN
292 #undef __FUNCT__
293 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
294 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
295 {
296   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
297   PetscErrorCode ierr;
298   PetscInt       n,i;
299   PetscScalar    *d,*o,*s;
300 
301   PetscFunctionBegin;
302   if (!uglyrmapd) {
303     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
304   }
305 
306   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
307 
308   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
309   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
310   for (i=0; i<n; i++) {
311     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
312   }
313   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
314   /* column scale "diagonal" portion of local matrix */
315   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
316 
317   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
318   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
319   for (i=0; i<n; i++) {
320     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
321   }
322   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
323   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
324   /* column scale "off-diagonal" portion of local matrix */
325   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
326 
327   PetscFunctionReturn(0);
328 }
329 EXTERN_C_END
330 
331 
332