xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision a77337e4d7d5143cd577a9cca2c72dcc9694336d)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 
8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap.bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs;
68   ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap.n =baij->B->cmap.N  = ec*mat->rmap.bs;
105   ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   for (i=0; i<ec; i++) {
114     garray[i] = bs*garray[i];
115   }
116   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
117   for (i=0; i<ec; i++) {
118     garray[i] = garray[i]/bs;
119   }
120 
121   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
122   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
123   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
124   ierr = PetscFree(stmp);CHKERRQ(ierr);
125 
126   /* create temporary global vector to generate scatter context */
127   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap.n,mat->cmap.N,PETSC_NULL,&gvec);CHKERRQ(ierr);
128 
129   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
130 
131   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
135   baij->garray = garray;
136   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
137   ierr = ISDestroy(from);CHKERRQ(ierr);
138   ierr = ISDestroy(to);CHKERRQ(ierr);
139   ierr = VecDestroy(gvec);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 /*
144      Takes the local part of an already assembled MPIBAIJ matrix
145    and disassembles it. This is to allow new nonzeros into the matrix
146    that require more communication in the matrix vector multiply.
147    Thus certain data-structures must be rebuilt.
148 
149    Kind of slow! But that's what application programmers get when
150    they are sloppy.
151 */
152 #undef __FUNCT__
153 #define __FUNCT__ "DisAssemble_MPIBAIJ"
154 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
155 {
156   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
157   Mat            B = baij->B,Bnew;
158   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
159   PetscErrorCode ierr;
160   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap.N,col,*garray=baij->garray;
161   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap.n;
162   MatScalar      *a = Bbaij->a;
163   PetscScalar    *atmp;
164 #if defined(PETSC_USE_MAT_SINGLE)
165   PetscInt       k;
166 #endif
167 
168   PetscFunctionBegin;
169   /* free stuff related to matrix-vec multiply */
170   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
171   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
172   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
173   if (baij->colmap) {
174 #if defined (PETSC_USE_CTABLE)
175     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
176 #else
177     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
178     baij->colmap = 0;
179     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
180 #endif
181   }
182 
183   /* make sure that B is assembled so we can access its values */
184   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
185   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
186 
187   /* invent new B and copy stuff over */
188   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
189   for (i=0; i<mbs; i++) {
190     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
191   }
192   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
193   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
194   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
195   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr);
196   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
197 
198 #if defined(PETSC_USE_MAT_SINGLE)
199   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
200 #endif
201     for (i=0; i<mbs; i++) {
202       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
203         col  = garray[Bbaij->j[j]];
204 #if defined(PETSC_USE_MAT_SINGLE)
205         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
206 #else
207         atmp = a + j*bs2;
208 #endif
209         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
210       }
211     }
212     ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
213 
214 #if defined(PETSC_USE_MAT_SINGLE)
215   ierr = PetscFree(atmp);CHKERRQ(ierr);
216 #endif
217 
218   ierr = PetscFree(nz);CHKERRQ(ierr);
219   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
220   baij->garray = 0;
221   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
222   ierr = MatDestroy(B);CHKERRQ(ierr);
223   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
224   baij->B = Bnew;
225   A->was_assembled = PETSC_FALSE;
226   PetscFunctionReturn(0);
227 }
228 
229 /*      ugly stuff added for Glenn someday we should fix this up */
230 
231 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
232                                       parts of the local matrix */
233 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
234 
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
238 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
239 {
240   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
241   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
242   PetscErrorCode ierr;
243   PetscInt       bs = inA->rmap.bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
244   PetscInt       *r_rmapd,*r_rmapo;
245 
246   PetscFunctionBegin;
247   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
248   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
249   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
250   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
251   nt   = 0;
252   for (i=0; i<inA->bmapping->n; i++) {
253     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
254       nt++;
255       r_rmapd[i] = inA->bmapping->indices[i] + 1;
256     }
257   }
258   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
259   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
260   for (i=0; i<inA->bmapping->n; i++) {
261     if (r_rmapd[i]){
262       for (j=0; j<bs; j++) {
263         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
264       }
265     }
266   }
267   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
268   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
269 
270   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
271   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
272   for (i=0; i<B->nbs; i++) {
273     lindices[garray[i]] = i+1;
274   }
275   no   = inA->bmapping->n - nt;
276   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
277   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
278   nt   = 0;
279   for (i=0; i<inA->bmapping->n; i++) {
280     if (lindices[inA->bmapping->indices[i]]) {
281       nt++;
282       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
283     }
284   }
285   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
286   ierr = PetscFree(lindices);CHKERRQ(ierr);
287   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
288   for (i=0; i<inA->bmapping->n; i++) {
289     if (r_rmapo[i]){
290       for (j=0; j<bs; j++) {
291         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
292       }
293     }
294   }
295   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
296   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
297 
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
303 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
304 {
305   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
306   PetscErrorCode ierr,(*f)(Mat,Vec);
307 
308   PetscFunctionBegin;
309   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
310   if (f) {
311     ierr = (*f)(A,scale);CHKERRQ(ierr);
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 EXTERN_C_BEGIN
317 #undef __FUNCT__
318 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
319 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
320 {
321   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
322   PetscErrorCode ierr;
323   PetscInt       n,i;
324   PetscScalar    *d,*o,*s;
325 
326   PetscFunctionBegin;
327   if (!uglyrmapd) {
328     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
329   }
330 
331   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
332 
333   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
334   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
335   for (i=0; i<n; i++) {
336     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
337   }
338   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
339   /* column scale "diagonal" portion of local matrix */
340   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
341 
342   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
343   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
344   for (i=0; i<n; i++) {
345     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
346   }
347   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
348   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
349   /* column scale "off-diagonal" portion of local matrix */
350   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
351 
352   PetscFunctionReturn(0);
353 }
354 EXTERN_C_END
355 
356 
357