xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision d53a3d6fea7331c598f751e9a4893fffdfd7b7a2)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc-private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined(PETSC_USE_CTABLE)
22   PetscTable         gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt           gid,lid;
25 #else
26   PetscInt Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 #if defined(PETSC_USE_CTABLE)
31   /* use a table - Mark Adams */
32   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
33   for (i=0; i<B->mbs; i++) {
34     for (j=0; j<B->ilen[i]; j++) {
35       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
45   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
46   while (tpos) {
47     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
48     gid--; lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       PetscInt gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid--;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs           = ec;
66   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
67 
68   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
70 #else
71   /* Make an array as long as the number of columns */
72   /* mark those columns that are in baij->B */
73   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
74   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
75   for (i=0; i<B->mbs; i++) {
76     for (j=0; j<B->ilen[i]; j++) {
77       if (!indices[aj[B->i[i] + j]]) ec++;
78       indices[aj[B->i[i] + j]] = 1;
79     }
80   }
81 
82   /* form array of columns we need */
83   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
84   ec   = 0;
85   for (i=0; i<Nbs; i++) {
86     if (indices[i]) {
87       garray[ec++] = i;
88     }
89   }
90 
91   /* make indices now point into garray */
92   for (i=0; i<ec; i++) {
93     indices[garray[i]] = i;
94   }
95 
96   /* compact out the extra columns in B */
97   for (i=0; i<B->mbs; i++) {
98     for (j=0; j<B->ilen[i]; j++) {
99       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100     }
101   }
102   B->nbs           = ec;
103   baij->B->cmap->n = baij->B->cmap->N  = ec*mat->rmap->bs;
104 
105   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
114 
115   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
116   for (i=0; i<ec; i++) stmp[i] = i;
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
118 
119   /* create temporary global vector to generate scatter context */
120   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
121 
122   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
123 
124   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
127   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
128 
129   baij->garray = garray;
130 
131   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
132   ierr = ISDestroy(&from);CHKERRQ(ierr);
133   ierr = ISDestroy(&to);CHKERRQ(ierr);
134   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 /*
139      Takes the local part of an already assembled MPIBAIJ matrix
140    and disassembles it. This is to allow new nonzeros into the matrix
141    that require more communication in the matrix vector multiply.
142    Thus certain data-structures must be rebuilt.
143 
144    Kind of slow! But that's what application programmers get when
145    they are sloppy.
146 */
147 #undef __FUNCT__
148 #define __FUNCT__ "MatDisAssemble_MPIBAIJ"
149 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
150 {
151   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
152   Mat            B      = baij->B,Bnew;
153   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
154   PetscErrorCode ierr;
155   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
156   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
157   MatScalar      *a  = Bbaij->a;
158   MatScalar      *atmp;
159 
160 
161   PetscFunctionBegin;
162   /* free stuff related to matrix-vec multiply */
163   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
164   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
165   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
166   if (baij->colmap) {
167 #if defined(PETSC_USE_CTABLE)
168     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
169 #else
170     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
171     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
172 #endif
173   }
174 
175   /* make sure that B is assembled so we can access its values */
176   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
178 
179   /* invent new B and copy stuff over */
180   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
181   for (i=0; i<mbs; i++) {
182     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
183   }
184   ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr);
185   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
186   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
187   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
188 
189   ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
190 
191   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
192 
193   for (i=0; i<mbs; i++) {
194     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
195       col  = garray[Bbaij->j[j]];
196       atmp = a + j*bs2;
197       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
198     }
199   }
200   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
201 
202   ierr = PetscFree(nz);CHKERRQ(ierr);
203   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
204   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
205   ierr = MatDestroy(&B);CHKERRQ(ierr);
206   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
207 
208   baij->B          = Bnew;
209   A->was_assembled = PETSC_FALSE;
210   A->assembled     = PETSC_FALSE;
211   PetscFunctionReturn(0);
212 }
213 
214 /*      ugly stuff added for Glenn someday we should fix this up */
215 
216 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
217 static Vec      uglydd     = 0,uglyoo     = 0;  /* work vectors used to scale the two parts of the local matrix */
218 
219 
220 #undef __FUNCT__
221 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
222 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
223 {
224   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
225   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
226   PetscErrorCode ierr;
227   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
228   PetscInt       *r_rmapd,*r_rmapo;
229 
230   PetscFunctionBegin;
231   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
232   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
233   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
234   ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
235   nt   = 0;
236   for (i=0; i<inA->rmap->bmapping->n; i++) {
237     if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) {
238       nt++;
239       r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1;
240     }
241   }
242   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
243   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
244   for (i=0; i<inA->rmap->bmapping->n; i++) {
245     if (r_rmapd[i]) {
246       for (j=0; j<bs; j++) {
247         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
248       }
249     }
250   }
251   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
252   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
253 
254   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
255   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
256   for (i=0; i<B->nbs; i++) {
257     lindices[garray[i]] = i+1;
258   }
259   no   = inA->rmap->bmapping->n - nt;
260   ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
261   ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
262   nt   = 0;
263   for (i=0; i<inA->rmap->bmapping->n; i++) {
264     if (lindices[inA->rmap->bmapping->indices[i]]) {
265       nt++;
266       r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]];
267     }
268   }
269   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
270   ierr = PetscFree(lindices);CHKERRQ(ierr);
271   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
272   for (i=0; i<inA->rmap->bmapping->n; i++) {
273     if (r_rmapo[i]) {
274       for (j=0; j<bs; j++) {
275         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
276       }
277     }
278   }
279   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
280   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
286 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
287 {
288   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
289   PetscErrorCode ierr;
290 
291   PetscFunctionBegin;
292   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
293   PetscFunctionReturn(0);
294 }
295 
296 EXTERN_C_BEGIN
297 #undef __FUNCT__
298 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
299 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
300 {
301   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
302   PetscErrorCode ierr;
303   PetscInt       n,i;
304   PetscScalar    *d,*o,*s;
305 
306   PetscFunctionBegin;
307   if (!uglyrmapd) {
308     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
309   }
310 
311   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
312 
313   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
314   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
315   for (i=0; i<n; i++) {
316     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
317   }
318   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
319   /* column scale "diagonal" portion of local matrix */
320   ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr);
321 
322   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
323   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
324   for (i=0; i<n; i++) {
325     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
326   }
327   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
328   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
329   /* column scale "off-diagonal" portion of local matrix */
330   ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 EXTERN_C_END
334 
335 
336