xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include "../src/mat/impls/baij/mpi/mpibaij.h"
6 
7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
14   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
15   PetscErrorCode ierr;
16   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
17   PetscInt       bs = mat->rmap->bs,*stmp;
18   IS             from,to;
19   Vec            gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable     gid1_lid1;
22   PetscTablePosition tpos;
23   PetscInt       gid,lid;
24 #else
25   PetscInt       Nbs = baij->Nbs,*indices;
26 #endif
27 
28   PetscFunctionBegin;
29 
30 #if defined (PETSC_USE_CTABLE)
31   /* use a table - Mark Adams */
32   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
33   for (i=0; i<B->mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
45   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
46   while (tpos) {
47     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
48     gid--; lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       PetscInt gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
67   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
68   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
69   /* Mark Adams */
70 #else
71   /* Make an array as long as the number of columns */
72   /* mark those columns that are in baij->B */
73   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
74   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
75   for (i=0; i<B->mbs; i++) {
76     for (j=0; j<B->ilen[i]; j++) {
77       if (!indices[aj[B->i[i] + j]]) ec++;
78       indices[aj[B->i[i] + j]] = 1;
79     }
80   }
81 
82   /* form array of columns we need */
83   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
84   ec = 0;
85   for (i=0; i<Nbs; i++) {
86     if (indices[i]) {
87       garray[ec++] = i;
88     }
89   }
90 
91   /* make indices now point into garray */
92   for (i=0; i<ec; i++) {
93     indices[garray[i]] = i;
94   }
95 
96   /* compact out the extra columns in B */
97   for (i=0; i<B->mbs; i++) {
98     for (j=0; j<B->ilen[i]; j++) {
99       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100     }
101   }
102   B->nbs       = ec;
103   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
104   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
105   ierr = PetscFree(indices);CHKERRQ(ierr);
106 #endif
107 
108   /* create local vector that is used to scatter into */
109   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
110 
111   /* create two temporary index sets for building scatter-gather */
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
113 
114   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
115   for (i=0; i<ec; i++) { stmp[i] = i; }
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
117 
118   /* create temporary global vector to generate scatter context */
119   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
120 
121   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
122 
123   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
127   baij->garray = garray;
128   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
129   ierr = ISDestroy(from);CHKERRQ(ierr);
130   ierr = ISDestroy(to);CHKERRQ(ierr);
131   ierr = VecDestroy(gvec);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 /*
136      Takes the local part of an already assembled MPIBAIJ matrix
137    and disassembles it. This is to allow new nonzeros into the matrix
138    that require more communication in the matrix vector multiply.
139    Thus certain data-structures must be rebuilt.
140 
141    Kind of slow! But that's what application programmers get when
142    they are sloppy.
143 */
144 #undef __FUNCT__
145 #define __FUNCT__ "DisAssemble_MPIBAIJ"
146 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
147 {
148   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
149   Mat            B = baij->B,Bnew;
150   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
151   PetscErrorCode ierr;
152   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
153   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
154   MatScalar      *a = Bbaij->a;
155   MatScalar      *atmp;
156 
157 
158   PetscFunctionBegin;
159   /* free stuff related to matrix-vec multiply */
160   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
161   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
162   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
163   if (baij->colmap) {
164 #if defined (PETSC_USE_CTABLE)
165     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
166 #else
167     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
168     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
169 #endif
170   }
171 
172   /* make sure that B is assembled so we can access its values */
173   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
174   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
175 
176   /* invent new B and copy stuff over */
177   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
178   for (i=0; i<mbs; i++) {
179     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
180   }
181   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
182   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
183   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
184   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
185   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
186 
187   for (i=0; i<mbs; i++) {
188     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
189       col  = garray[Bbaij->j[j]];
190       atmp = a + j*bs2;
191       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
192     }
193   }
194   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
195 
196   ierr = PetscFree(nz);CHKERRQ(ierr);
197   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = MatDestroy(B);CHKERRQ(ierr);
200   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
201   baij->B = Bnew;
202   A->was_assembled = PETSC_FALSE;
203   A->assembled     = PETSC_FALSE;
204   PetscFunctionReturn(0);
205 }
206 
207 /*      ugly stuff added for Glenn someday we should fix this up */
208 
209 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
210                                       parts of the local matrix */
211 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
212 
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
216 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
217 {
218   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
219   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
220   PetscErrorCode ierr;
221   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
222   PetscInt       *r_rmapd,*r_rmapo;
223 
224   PetscFunctionBegin;
225   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
226   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
227   ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
228   ierr = PetscMemzero(r_rmapd,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
229   nt   = 0;
230   for (i=0; i<inA->rbmapping->n; i++) {
231     if (inA->rbmapping->indices[i]*bs >= cstart && inA->rbmapping->indices[i]*bs < cend) {
232       nt++;
233       r_rmapd[i] = inA->rbmapping->indices[i] + 1;
234     }
235   }
236   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
237   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
238   for (i=0; i<inA->rbmapping->n; i++) {
239     if (r_rmapd[i]){
240       for (j=0; j<bs; j++) {
241         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
242       }
243     }
244   }
245   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
246   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
247 
248   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
249   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
250   for (i=0; i<B->nbs; i++) {
251     lindices[garray[i]] = i+1;
252   }
253   no   = inA->rbmapping->n - nt;
254   ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
255   ierr = PetscMemzero(r_rmapo,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
256   nt   = 0;
257   for (i=0; i<inA->rbmapping->n; i++) {
258     if (lindices[inA->rbmapping->indices[i]]) {
259       nt++;
260       r_rmapo[i] = lindices[inA->rbmapping->indices[i]];
261     }
262   }
263   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
264   ierr = PetscFree(lindices);CHKERRQ(ierr);
265   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
266   for (i=0; i<inA->rbmapping->n; i++) {
267     if (r_rmapo[i]){
268       for (j=0; j<bs; j++) {
269         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
270       }
271     }
272   }
273   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
274   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
275 
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
281 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
282 {
283   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
284   PetscErrorCode ierr;
285 
286   PetscFunctionBegin;
287   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 EXTERN_C_BEGIN
292 #undef __FUNCT__
293 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
294 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
295 {
296   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
297   PetscErrorCode ierr;
298   PetscInt       n,i;
299   PetscScalar    *d,*o,*s;
300 
301   PetscFunctionBegin;
302   if (!uglyrmapd) {
303     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
304   }
305 
306   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
307 
308   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
309   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
310   for (i=0; i<n; i++) {
311     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
312   }
313   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
314   /* column scale "diagonal" portion of local matrix */
315   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
316 
317   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
318   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
319   for (i=0; i<n; i++) {
320     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
321   }
322   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
323   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
324   /* column scale "off-diagonal" portion of local matrix */
325   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
326 
327   PetscFunctionReturn(0);
328 }
329 EXTERN_C_END
330 
331 
332