xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/baij/mpi/mpibaij.h"
7 
8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
105   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
114 
115   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
116   for (i=0; i<ec; i++) { stmp[i] = i; }
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
118 
119   /* create temporary global vector to generate scatter context */
120   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
121 
122   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
123 
124   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
127   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
128   baij->garray = garray;
129   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
130   ierr = ISDestroy(from);CHKERRQ(ierr);
131   ierr = ISDestroy(to);CHKERRQ(ierr);
132   ierr = VecDestroy(gvec);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 /*
137      Takes the local part of an already assembled MPIBAIJ matrix
138    and disassembles it. This is to allow new nonzeros into the matrix
139    that require more communication in the matrix vector multiply.
140    Thus certain data-structures must be rebuilt.
141 
142    Kind of slow! But that's what application programmers get when
143    they are sloppy.
144 */
145 #undef __FUNCT__
146 #define __FUNCT__ "DisAssemble_MPIBAIJ"
147 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
148 {
149   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
150   Mat            B = baij->B,Bnew;
151   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
152   PetscErrorCode ierr;
153   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
154   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
155   MatScalar      *a = Bbaij->a;
156   MatScalar      *atmp;
157 
158 
159   PetscFunctionBegin;
160   /* free stuff related to matrix-vec multiply */
161   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
162   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
163   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
164   if (baij->colmap) {
165 #if defined (PETSC_USE_CTABLE)
166     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
167 #else
168     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
169     baij->colmap = 0;
170     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
171 #endif
172   }
173 
174   /* make sure that B is assembled so we can access its values */
175   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
176   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177 
178   /* invent new B and copy stuff over */
179   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
180   for (i=0; i<mbs; i++) {
181     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
182   }
183   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
184   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
185   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
186   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
187   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
188 
189   for (i=0; i<mbs; i++) {
190     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
191       col  = garray[Bbaij->j[j]];
192       atmp = a + j*bs2;
193       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
194     }
195   }
196   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
197 
198   ierr = PetscFree(nz);CHKERRQ(ierr);
199   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
200   baij->garray = 0;
201   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
202   ierr = MatDestroy(B);CHKERRQ(ierr);
203   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
204   baij->B = Bnew;
205   A->was_assembled = PETSC_FALSE;
206   A->assembled     = PETSC_FALSE;
207   PetscFunctionReturn(0);
208 }
209 
210 /*      ugly stuff added for Glenn someday we should fix this up */
211 
212 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
213                                       parts of the local matrix */
214 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
215 
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
219 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
220 {
221   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
222   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
223   PetscErrorCode ierr;
224   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
225   PetscInt       *r_rmapd,*r_rmapo;
226 
227   PetscFunctionBegin;
228   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
229   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
230   ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
231   ierr = PetscMemzero(r_rmapd,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
232   nt   = 0;
233   for (i=0; i<inA->rbmapping->n; i++) {
234     if (inA->rbmapping->indices[i]*bs >= cstart && inA->rbmapping->indices[i]*bs < cend) {
235       nt++;
236       r_rmapd[i] = inA->rbmapping->indices[i] + 1;
237     }
238   }
239   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
240   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
241   for (i=0; i<inA->rbmapping->n; i++) {
242     if (r_rmapd[i]){
243       for (j=0; j<bs; j++) {
244         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
245       }
246     }
247   }
248   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
249   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
250 
251   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
252   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
253   for (i=0; i<B->nbs; i++) {
254     lindices[garray[i]] = i+1;
255   }
256   no   = inA->rbmapping->n - nt;
257   ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
258   ierr = PetscMemzero(r_rmapo,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
259   nt   = 0;
260   for (i=0; i<inA->rbmapping->n; i++) {
261     if (lindices[inA->rbmapping->indices[i]]) {
262       nt++;
263       r_rmapo[i] = lindices[inA->rbmapping->indices[i]];
264     }
265   }
266   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
267   ierr = PetscFree(lindices);CHKERRQ(ierr);
268   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
269   for (i=0; i<inA->rbmapping->n; i++) {
270     if (r_rmapo[i]){
271       for (j=0; j<bs; j++) {
272         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
273       }
274     }
275   }
276   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
277   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
278 
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
284 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
285 {
286   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
287   PetscErrorCode ierr;
288 
289   PetscFunctionBegin;
290   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
291   PetscFunctionReturn(0);
292 }
293 
294 EXTERN_C_BEGIN
295 #undef __FUNCT__
296 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
297 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
298 {
299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
300   PetscErrorCode ierr;
301   PetscInt       n,i;
302   PetscScalar    *d,*o,*s;
303 
304   PetscFunctionBegin;
305   if (!uglyrmapd) {
306     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
307   }
308 
309   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
310 
311   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
312   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
313   for (i=0; i<n; i++) {
314     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
315   }
316   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
317   /* column scale "diagonal" portion of local matrix */
318   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
319 
320   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
321   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
322   for (i=0; i<n; i++) {
323     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
324   }
325   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
326   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
327   /* column scale "off-diagonal" portion of local matrix */
328   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
329 
330   PetscFunctionReturn(0);
331 }
332 EXTERN_C_END
333 
334 
335