xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision fdb35ca6b8bfd78facfcbaad2994e8eeeb8f8a19)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 
7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
14   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
15   PetscErrorCode ierr;
16   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
17   PetscInt       bs = mat->rmap->bs,*stmp;
18   IS             from,to;
19   Vec            gvec;
20 #if defined(PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   PetscInt           gid,lid;
24 #else
25   PetscInt Nbs = baij->Nbs,*indices;
26 #endif
27 
28   PetscFunctionBegin;
29 #if defined(PETSC_USE_CTABLE)
30   /* use a table - Mark Adams */
31   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
32   for (i=0; i<B->mbs; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
35       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
36       if (!data) {
37         /* one based table */
38         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
39       }
40     }
41   }
42   /* form array of columns we need */
43   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<B->mbs; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid--;
61       aj[B->i[i]+j] = lid;
62     }
63   }
64   B->nbs           = ec;
65   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
66 
67   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
68   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
69 #else
70   /* Make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j]] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
83   ec   = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs           = ec;
102   baij->B->cmap->n = baij->B->cmap->N  = ec*mat->rmap->bs;
103 
104   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
105   ierr = PetscFree(indices);CHKERRQ(ierr);
106 #endif
107 
108   /* create local vector that is used to scatter into */
109   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
110 
111   /* create two temporary index sets for building scatter-gather */
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
113 
114   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
115   for (i=0; i<ec; i++) stmp[i] = i;
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
117 
118   /* create temporary global vector to generate scatter context */
119   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
120 
121   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
122 
123   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
127 
128   baij->garray = garray;
129 
130   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
131   ierr = ISDestroy(&from);CHKERRQ(ierr);
132   ierr = ISDestroy(&to);CHKERRQ(ierr);
133   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 /*
138      Takes the local part of an already assembled MPIBAIJ matrix
139    and disassembles it. This is to allow new nonzeros into the matrix
140    that require more communication in the matrix vector multiply.
141    Thus certain data-structures must be rebuilt.
142 
143    Kind of slow! But that's what application programmers get when
144    they are sloppy.
145 */
146 #undef __FUNCT__
147 #define __FUNCT__ "MatDisAssemble_MPIBAIJ"
148 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
149 {
150   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
151   Mat            B      = baij->B,Bnew;
152   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
153   PetscErrorCode ierr;
154   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
155   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
156   MatScalar      *a  = Bbaij->a;
157   MatScalar      *atmp;
158 
159 
160   PetscFunctionBegin;
161   /* free stuff related to matrix-vec multiply */
162   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
163   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
164   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
165   if (baij->colmap) {
166 #if defined(PETSC_USE_CTABLE)
167     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
168 #else
169     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
170     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
171 #endif
172   }
173 
174   /* make sure that B is assembled so we can access its values */
175   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
176   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177 
178   /* invent new B and copy stuff over */
179   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
180   for (i=0; i<mbs; i++) {
181     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
182   }
183   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
184   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
185   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
186   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
187 
188   ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
189 
190   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
191 
192   for (i=0; i<mbs; i++) {
193     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
194       col  = garray[Bbaij->j[j]];
195       atmp = a + j*bs2;
196       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
197     }
198   }
199   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
200 
201   ierr = PetscFree(nz);CHKERRQ(ierr);
202   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
203   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
204   ierr = MatDestroy(&B);CHKERRQ(ierr);
205   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
206 
207   baij->B          = Bnew;
208   A->was_assembled = PETSC_FALSE;
209   A->assembled     = PETSC_FALSE;
210   PetscFunctionReturn(0);
211 }
212 
213 /*      ugly stuff added for Glenn someday we should fix this up */
214 
215 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
216 static Vec      uglydd     = 0,uglyoo     = 0;  /* work vectors used to scale the two parts of the local matrix */
217 
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
221 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
222 {
223   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
224   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
225   PetscErrorCode ierr;
226   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
227   PetscInt       *r_rmapd,*r_rmapo;
228 
229   PetscFunctionBegin;
230   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
231   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
232   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
233   ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
234   nt   = 0;
235   for (i=0; i<inA->rmap->bmapping->n; i++) {
236     if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) {
237       nt++;
238       r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1;
239     }
240   }
241   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
242   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
243   for (i=0; i<inA->rmap->bmapping->n; i++) {
244     if (r_rmapd[i]) {
245       for (j=0; j<bs; j++) {
246         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
247       }
248     }
249   }
250   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
251   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
252 
253   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
254   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
255   for (i=0; i<B->nbs; i++) {
256     lindices[garray[i]] = i+1;
257   }
258   no   = inA->rmap->bmapping->n - nt;
259   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
260   ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
261   nt   = 0;
262   for (i=0; i<inA->rmap->bmapping->n; i++) {
263     if (lindices[inA->rmap->bmapping->indices[i]]) {
264       nt++;
265       r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]];
266     }
267   }
268   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
269   ierr = PetscFree(lindices);CHKERRQ(ierr);
270   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
271   for (i=0; i<inA->rmap->bmapping->n; i++) {
272     if (r_rmapo[i]) {
273       for (j=0; j<bs; j++) {
274         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
275       }
276     }
277   }
278   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
279   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
285 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
286 {
287   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
288   PetscErrorCode ierr;
289 
290   PetscFunctionBegin;
291   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 EXTERN_C_BEGIN
296 #undef __FUNCT__
297 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
298 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
299 {
300   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
301   PetscErrorCode ierr;
302   PetscInt       n,i;
303   PetscScalar    *d,*o,*s;
304 
305   PetscFunctionBegin;
306   if (!uglyrmapd) {
307     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
308   }
309 
310   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
311 
312   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
313   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
314   for (i=0; i<n; i++) {
315     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
316   }
317   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
318   /* column scale "diagonal" portion of local matrix */
319   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
320 
321   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
322   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
323   for (i=0; i<n; i++) {
324     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
325   }
326   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
327   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
328   /* column scale "off-diagonal" portion of local matrix */
329   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
330   PetscFunctionReturn(0);
331 }
332 EXTERN_C_END
333 
334 
335