xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision e82e9f6b2f396ccc2201d5c70911055c46105893)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
105   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
114 
115   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
116   for (i=0; i<ec; i++) { stmp[i] = i; }
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
118   ierr = PetscFree(stmp);CHKERRQ(ierr);
119 
120   /* create temporary global vector to generate scatter context */
121   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
122 
123   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
124 
125   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
127   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
128   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
129   baij->garray = garray;
130   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
131   ierr = ISDestroy(from);CHKERRQ(ierr);
132   ierr = ISDestroy(to);CHKERRQ(ierr);
133   ierr = VecDestroy(gvec);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 /*
138      Takes the local part of an already assembled MPIBAIJ matrix
139    and disassembles it. This is to allow new nonzeros into the matrix
140    that require more communication in the matrix vector multiply.
141    Thus certain data-structures must be rebuilt.
142 
143    Kind of slow! But that's what application programmers get when
144    they are sloppy.
145 */
146 #undef __FUNCT__
147 #define __FUNCT__ "DisAssemble_MPIBAIJ"
148 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
149 {
150   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
151   Mat            B = baij->B,Bnew;
152   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
153   PetscErrorCode ierr;
154   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
155   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
156   MatScalar      *a = Bbaij->a;
157   MatScalar      *atmp;
158 
159 
160   PetscFunctionBegin;
161   /* free stuff related to matrix-vec multiply */
162   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
163   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
164   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
165   if (baij->colmap) {
166 #if defined (PETSC_USE_CTABLE)
167     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
168 #else
169     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
170     baij->colmap = 0;
171     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
172 #endif
173   }
174 
175   /* make sure that B is assembled so we can access its values */
176   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
178 
179   /* invent new B and copy stuff over */
180   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
181   for (i=0; i<mbs; i++) {
182     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
183   }
184   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
185   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
186   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
187   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
188   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
189 
190   for (i=0; i<mbs; i++) {
191     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
192       col  = garray[Bbaij->j[j]];
193       atmp = a + j*bs2;
194       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
195     }
196   }
197   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
198 
199   ierr = PetscFree(nz);CHKERRQ(ierr);
200   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
201   baij->garray = 0;
202   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
203   ierr = MatDestroy(B);CHKERRQ(ierr);
204   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
205   baij->B = Bnew;
206   A->was_assembled = PETSC_FALSE;
207   PetscFunctionReturn(0);
208 }
209 
210 /*      ugly stuff added for Glenn someday we should fix this up */
211 
212 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
213                                       parts of the local matrix */
214 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
215 
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
219 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
220 {
221   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
222   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
223   PetscErrorCode ierr;
224   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
225   PetscInt       *r_rmapd,*r_rmapo;
226 
227   PetscFunctionBegin;
228   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
229   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
230   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
231   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
232   nt   = 0;
233   for (i=0; i<inA->bmapping->n; i++) {
234     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
235       nt++;
236       r_rmapd[i] = inA->bmapping->indices[i] + 1;
237     }
238   }
239   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
240   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
241   for (i=0; i<inA->bmapping->n; i++) {
242     if (r_rmapd[i]){
243       for (j=0; j<bs; j++) {
244         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
245       }
246     }
247   }
248   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
249   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
250 
251   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
252   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
253   for (i=0; i<B->nbs; i++) {
254     lindices[garray[i]] = i+1;
255   }
256   no   = inA->bmapping->n - nt;
257   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
258   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
259   nt   = 0;
260   for (i=0; i<inA->bmapping->n; i++) {
261     if (lindices[inA->bmapping->indices[i]]) {
262       nt++;
263       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
264     }
265   }
266   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
267   ierr = PetscFree(lindices);CHKERRQ(ierr);
268   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
269   for (i=0; i<inA->bmapping->n; i++) {
270     if (r_rmapo[i]){
271       for (j=0; j<bs; j++) {
272         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
273       }
274     }
275   }
276   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
277   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
278 
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
284 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
285 {
286   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
287   PetscErrorCode ierr;
288 
289   PetscFunctionBegin;
290   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
291   PetscFunctionReturn(0);
292 }
293 
294 EXTERN_C_BEGIN
295 #undef __FUNCT__
296 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
297 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
298 {
299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
300   PetscErrorCode ierr;
301   PetscInt       n,i;
302   PetscScalar    *d,*o,*s;
303 
304   PetscFunctionBegin;
305   if (!uglyrmapd) {
306     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
307   }
308 
309   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
310 
311   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
312   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
313   for (i=0; i<n; i++) {
314     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
315   }
316   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
317   /* column scale "diagonal" portion of local matrix */
318   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
319 
320   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
321   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
322   for (i=0; i<n; i++) {
323     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
324   }
325   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
326   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
327   /* column scale "off-diagonal" portion of local matrix */
328   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
329 
330   PetscFunctionReturn(0);
331 }
332 EXTERN_C_END
333 
334 
335