xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision d61e7eb36d836a8e2bf1ad3f41344afcd0d5cf20)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
105   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
114 
115   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
116   for (i=0; i<ec; i++) { stmp[i] = i; }
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
118 
119   /* create temporary global vector to generate scatter context */
120   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
121 
122   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
123 
124   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
127   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
128   baij->garray = garray;
129   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
130   ierr = ISDestroy(from);CHKERRQ(ierr);
131   ierr = ISDestroy(to);CHKERRQ(ierr);
132   ierr = VecDestroy(gvec);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 /*
137      Takes the local part of an already assembled MPIBAIJ matrix
138    and disassembles it. This is to allow new nonzeros into the matrix
139    that require more communication in the matrix vector multiply.
140    Thus certain data-structures must be rebuilt.
141 
142    Kind of slow! But that's what application programmers get when
143    they are sloppy.
144 */
145 #undef __FUNCT__
146 #define __FUNCT__ "DisAssemble_MPIBAIJ"
147 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
148 {
149   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
150   Mat            B = baij->B,Bnew;
151   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
152   PetscErrorCode ierr;
153   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
154   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
155   MatScalar      *a = Bbaij->a;
156   MatScalar      *atmp;
157 
158 
159   PetscFunctionBegin;
160   /* free stuff related to matrix-vec multiply */
161   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
162   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
163   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
164   if (baij->colmap) {
165 #if defined (PETSC_USE_CTABLE)
166     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
167 #else
168     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
169     baij->colmap = 0;
170     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
171 #endif
172   }
173 
174   /* make sure that B is assembled so we can access its values */
175   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
176   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177 
178   /* invent new B and copy stuff over */
179   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
180   for (i=0; i<mbs; i++) {
181     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
182   }
183   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
184   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
185   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
186   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
187   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
188 
189   for (i=0; i<mbs; i++) {
190     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
191       col  = garray[Bbaij->j[j]];
192       atmp = a + j*bs2;
193       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
194     }
195   }
196   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
197 
198   ierr = PetscFree(nz);CHKERRQ(ierr);
199   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
200   baij->garray = 0;
201   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
202   ierr = MatDestroy(B);CHKERRQ(ierr);
203   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
204   baij->B = Bnew;
205   A->was_assembled = PETSC_FALSE;
206   PetscFunctionReturn(0);
207 }
208 
209 /*      ugly stuff added for Glenn someday we should fix this up */
210 
211 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
212                                       parts of the local matrix */
213 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
214 
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
218 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
219 {
220   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
221   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
222   PetscErrorCode ierr;
223   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
224   PetscInt       *r_rmapd,*r_rmapo;
225 
226   PetscFunctionBegin;
227   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
228   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
229   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
230   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
231   nt   = 0;
232   for (i=0; i<inA->bmapping->n; i++) {
233     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
234       nt++;
235       r_rmapd[i] = inA->bmapping->indices[i] + 1;
236     }
237   }
238   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
239   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
240   for (i=0; i<inA->bmapping->n; i++) {
241     if (r_rmapd[i]){
242       for (j=0; j<bs; j++) {
243         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
244       }
245     }
246   }
247   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
248   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
249 
250   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
251   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
252   for (i=0; i<B->nbs; i++) {
253     lindices[garray[i]] = i+1;
254   }
255   no   = inA->bmapping->n - nt;
256   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
257   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
258   nt   = 0;
259   for (i=0; i<inA->bmapping->n; i++) {
260     if (lindices[inA->bmapping->indices[i]]) {
261       nt++;
262       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
263     }
264   }
265   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
266   ierr = PetscFree(lindices);CHKERRQ(ierr);
267   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
268   for (i=0; i<inA->bmapping->n; i++) {
269     if (r_rmapo[i]){
270       for (j=0; j<bs; j++) {
271         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
272       }
273     }
274   }
275   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
276   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
277 
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
283 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
284 {
285   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 EXTERN_C_BEGIN
294 #undef __FUNCT__
295 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
296 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
297 {
298   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
299   PetscErrorCode ierr;
300   PetscInt       n,i;
301   PetscScalar    *d,*o,*s;
302 
303   PetscFunctionBegin;
304   if (!uglyrmapd) {
305     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
306   }
307 
308   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
309 
310   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
311   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
312   for (i=0; i<n; i++) {
313     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
314   }
315   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
316   /* column scale "diagonal" portion of local matrix */
317   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
318 
319   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
320   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
321   for (i=0; i<n; i++) {
322     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
323   }
324   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
325   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
326   /* column scale "off-diagonal" portion of local matrix */
327   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
328 
329   PetscFunctionReturn(0);
330 }
331 EXTERN_C_END
332 
333 
334