xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 7cf18bfc0af3bd20f57ca1ab9dbfc3d774cdee5d)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 
7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
14   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
15   PetscErrorCode ierr;
16   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
17   PetscInt       bs = mat->rmap->bs,*stmp;
18   IS             from,to;
19   Vec            gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable     gid1_lid1;
22   PetscTablePosition tpos;
23   PetscInt       gid,lid;
24 #else
25   PetscInt       Nbs = baij->Nbs,*indices;
26 #endif
27 
28   PetscFunctionBegin;
29 
30 #if defined (PETSC_USE_CTABLE)
31   /* use a table - Mark Adams */
32   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
33   for (i=0; i<B->mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
45   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
46   while (tpos) {
47     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
48     gid--; lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       PetscInt gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
67   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
68   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
69 #else
70   /* Make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j]] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
83   ec = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs       = ec;
102   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
103   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
104   ierr = PetscFree(indices);CHKERRQ(ierr);
105 #endif
106 
107   /* create local vector that is used to scatter into */
108   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
109 
110   /* create two temporary index sets for building scatter-gather */
111   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
112 
113   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
114   for (i=0; i<ec; i++) { stmp[i] = i; }
115   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
116 
117   /* create temporary global vector to generate scatter context */
118   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
119 
120   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
121 
122   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
123   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
124   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
126   baij->garray = garray;
127   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
128   ierr = ISDestroy(&from);CHKERRQ(ierr);
129   ierr = ISDestroy(&to);CHKERRQ(ierr);
130   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 /*
135      Takes the local part of an already assembled MPIBAIJ matrix
136    and disassembles it. This is to allow new nonzeros into the matrix
137    that require more communication in the matrix vector multiply.
138    Thus certain data-structures must be rebuilt.
139 
140    Kind of slow! But that's what application programmers get when
141    they are sloppy.
142 */
143 #undef __FUNCT__
144 #define __FUNCT__ "DisAssemble_MPIBAIJ"
145 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
146 {
147   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
148   Mat            B = baij->B,Bnew;
149   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
150   PetscErrorCode ierr;
151   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
152   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
153   MatScalar      *a = Bbaij->a;
154   MatScalar      *atmp;
155 
156 
157   PetscFunctionBegin;
158   /* free stuff related to matrix-vec multiply */
159   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
160   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
161   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
162   if (baij->colmap) {
163 #if defined (PETSC_USE_CTABLE)
164     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
165 #else
166     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
167     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
168 #endif
169   }
170 
171   /* make sure that B is assembled so we can access its values */
172   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
173   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
174 
175   /* invent new B and copy stuff over */
176   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
177   for (i=0; i<mbs; i++) {
178     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
179   }
180   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
181   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
182   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
183   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
184   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
185 
186   for (i=0; i<mbs; i++) {
187     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
188       col  = garray[Bbaij->j[j]];
189       atmp = a + j*bs2;
190       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
191     }
192   }
193   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
194 
195   ierr = PetscFree(nz);CHKERRQ(ierr);
196   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
197   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
198   ierr = MatDestroy(&B);CHKERRQ(ierr);
199   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
200   baij->B = Bnew;
201   A->was_assembled = PETSC_FALSE;
202   A->assembled     = PETSC_FALSE;
203   PetscFunctionReturn(0);
204 }
205 
206 /*      ugly stuff added for Glenn someday we should fix this up */
207 
208 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
209                                       parts of the local matrix */
210 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
211 
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
215 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
216 {
217   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
218   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
219   PetscErrorCode ierr;
220   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
221   PetscInt       *r_rmapd,*r_rmapo;
222 
223   PetscFunctionBegin;
224   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
225   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
226   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
227   ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
228   nt   = 0;
229   for (i=0; i<inA->rmap->bmapping->n; i++) {
230     if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) {
231       nt++;
232       r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1;
233     }
234   }
235   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
236   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
237   for (i=0; i<inA->rmap->bmapping->n; i++) {
238     if (r_rmapd[i]){
239       for (j=0; j<bs; j++) {
240         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
241       }
242     }
243   }
244   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
245   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
246 
247   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
248   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
249   for (i=0; i<B->nbs; i++) {
250     lindices[garray[i]] = i+1;
251   }
252   no   = inA->rmap->bmapping->n - nt;
253   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
254   ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
255   nt   = 0;
256   for (i=0; i<inA->rmap->bmapping->n; i++) {
257     if (lindices[inA->rmap->bmapping->indices[i]]) {
258       nt++;
259       r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]];
260     }
261   }
262   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
263   ierr = PetscFree(lindices);CHKERRQ(ierr);
264   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
265   for (i=0; i<inA->rmap->bmapping->n; i++) {
266     if (r_rmapo[i]){
267       for (j=0; j<bs; j++) {
268         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
269       }
270     }
271   }
272   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
273   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
274 
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
280 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
281 {
282   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 EXTERN_C_BEGIN
291 #undef __FUNCT__
292 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
293 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
294 {
295   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
296   PetscErrorCode ierr;
297   PetscInt       n,i;
298   PetscScalar    *d,*o,*s;
299 
300   PetscFunctionBegin;
301   if (!uglyrmapd) {
302     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
303   }
304 
305   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
306 
307   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
308   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
309   for (i=0; i<n; i++) {
310     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
311   }
312   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
313   /* column scale "diagonal" portion of local matrix */
314   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
315 
316   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
317   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
318   for (i=0; i<n; i++) {
319     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
320   }
321   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
322   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
323   /* column scale "off-diagonal" portion of local matrix */
324   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
325 
326   PetscFunctionReturn(0);
327 }
328 EXTERN_C_END
329 
330 
331