xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
11   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
12   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
13   PetscInt       bs = mat->rmap->bs,*stmp;
14   IS             from,to;
15   Vec            gvec;
16 #if defined(PETSC_USE_CTABLE)
17   PetscTable         gid1_lid1;
18   PetscTablePosition tpos;
19   PetscInt           gid,lid;
20 #else
21   PetscInt Nbs = baij->Nbs,*indices;
22 #endif
23 
24   PetscFunctionBegin;
25 #if defined(PETSC_USE_CTABLE)
26   /* use a table - Mark Adams */
27   PetscCall(PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1));
28   for (i=0; i<B->mbs; i++) {
29     for (j=0; j<B->ilen[i]; j++) {
30       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
31       PetscCall(PetscTableFind(gid1_lid1,gid1,&data));
32       if (!data) {
33         /* one based table */
34         PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES));
35       }
36     }
37   }
38   /* form array of columns we need */
39   PetscCall(PetscMalloc1(ec,&garray));
40   PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos));
41   while (tpos) {
42     PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid));
43     gid--; lid--;
44     garray[lid] = gid;
45   }
46   PetscCall(PetscSortInt(ec,garray));
47   PetscCall(PetscTableRemoveAll(gid1_lid1));
48   for (i=0; i<ec; i++) {
49     PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES));
50   }
51   /* compact out the extra columns in B */
52   for (i=0; i<B->mbs; i++) {
53     for (j=0; j<B->ilen[i]; j++) {
54       PetscInt gid1 = aj[B->i[i] + j] + 1;
55       PetscCall(PetscTableFind(gid1_lid1,gid1,&lid));
56       lid--;
57       aj[B->i[i]+j] = lid;
58     }
59   }
60   B->nbs           = ec;
61   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
62   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap));
63   PetscCall(PetscTableDestroy(&gid1_lid1));
64 #else
65   /* Make an array as long as the number of columns */
66   /* mark those columns that are in baij->B */
67   PetscCall(PetscCalloc1(Nbs,&indices));
68   for (i=0; i<B->mbs; i++) {
69     for (j=0; j<B->ilen[i]; j++) {
70       if (!indices[aj[B->i[i] + j]]) ec++;
71       indices[aj[B->i[i] + j]] = 1;
72     }
73   }
74 
75   /* form array of columns we need */
76   PetscCall(PetscMalloc1(ec,&garray));
77   ec   = 0;
78   for (i=0; i<Nbs; i++) {
79     if (indices[i]) {
80       garray[ec++] = i;
81     }
82   }
83 
84   /* make indices now point into garray */
85   for (i=0; i<ec; i++) {
86     indices[garray[i]] = i;
87   }
88 
89   /* compact out the extra columns in B */
90   for (i=0; i<B->mbs; i++) {
91     for (j=0; j<B->ilen[i]; j++) {
92       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
93     }
94   }
95   B->nbs           = ec;
96   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
97   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap));
98   PetscCall(PetscFree(indices));
99 #endif
100 
101   /* create local vector that is used to scatter into */
102   PetscCall(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec));
103 
104   /* create two temporary index sets for building scatter-gather */
105   PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from));
106 
107   PetscCall(PetscMalloc1(ec,&stmp));
108   for (i=0; i<ec; i++) stmp[i] = i;
109   PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to));
110 
111   /* create temporary global vector to generate scatter context */
112   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec));
113 
114   PetscCall(VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx));
115   PetscCall(VecScatterViewFromOptions(baij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view"));
116 
117   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx));
118   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec));
119   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
120   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
121 
122   baij->garray = garray;
123 
124   PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt)));
125   PetscCall(ISDestroy(&from));
126   PetscCall(ISDestroy(&to));
127   PetscCall(VecDestroy(&gvec));
128   PetscFunctionReturn(0);
129 }
130 
131 /*
132      Takes the local part of an already assembled MPIBAIJ matrix
133    and disassembles it. This is to allow new nonzeros into the matrix
134    that require more communication in the matrix vector multiply.
135    Thus certain data-structures must be rebuilt.
136 
137    Kind of slow! But that's what application programmers get when
138    they are sloppy.
139 */
140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
141 {
142   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
143   Mat            B      = baij->B,Bnew;
144   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
145   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
146   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
147   MatScalar      *a  = Bbaij->a;
148   MatScalar      *atmp;
149 
150   PetscFunctionBegin;
151   /* free stuff related to matrix-vec multiply */
152   PetscCall(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */
153   PetscCall(VecDestroy(&baij->lvec)); baij->lvec = NULL;
154   PetscCall(VecScatterDestroy(&baij->Mvctx)); baij->Mvctx = NULL;
155   if (baij->colmap) {
156 #if defined(PETSC_USE_CTABLE)
157     PetscCall(PetscTableDestroy(&baij->colmap));
158 #else
159     PetscCall(PetscFree(baij->colmap));
160     PetscCall(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt)));
161 #endif
162   }
163 
164   /* make sure that B is assembled so we can access its values */
165   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
166   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
167 
168   /* invent new B and copy stuff over */
169   PetscCall(PetscMalloc1(mbs,&nz));
170   for (i=0; i<mbs; i++) {
171     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
172   }
173   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&Bnew));
174   PetscCall(MatSetSizes(Bnew,m,n,m,n));
175   PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name));
176   PetscCall(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz));
177   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
178     ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
179   }
180 
181   PetscCall(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE));
182   /*
183    Ensure that B's nonzerostate is monotonically increasing.
184    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
185    */
186   Bnew->nonzerostate = B->nonzerostate;
187 
188   for (i=0; i<mbs; i++) {
189     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
190       col  = garray[Bbaij->j[j]];
191       atmp = a + j*bs2;
192       PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode));
193     }
194   }
195   PetscCall(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE));
196 
197   PetscCall(PetscFree(nz));
198   PetscCall(PetscFree(baij->garray));
199   PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt)));
200   PetscCall(MatDestroy(&B));
201   PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew));
202 
203   baij->B          = Bnew;
204   A->was_assembled = PETSC_FALSE;
205   A->assembled     = PETSC_FALSE;
206   PetscFunctionReturn(0);
207 }
208 
209 /*      ugly stuff added for Glenn someday we should fix this up */
210 
211 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
212 static Vec      uglydd     = NULL,uglyoo     = NULL;  /* work vectors used to scale the two parts of the local matrix */
213 
214 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
215 {
216   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
217   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
218   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
219   PetscInt       *r_rmapd,*r_rmapo;
220 
221   PetscFunctionBegin;
222   PetscCall(MatGetOwnershipRange(inA,&cstart,&cend));
223   PetscCall(MatGetSize(ina->A,NULL,&n));
224   PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd));
225   nt   = 0;
226   for (i=0; i<inA->rmap->mapping->n; i++) {
227     if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
228       nt++;
229       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
230     }
231   }
232   PetscCheckFalse(nt*bs != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT,nt*bs,n);
233   PetscCall(PetscMalloc1(n+1,&uglyrmapd));
234   for (i=0; i<inA->rmap->mapping->n; i++) {
235     if (r_rmapd[i]) {
236       for (j=0; j<bs; j++) {
237         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
238       }
239     }
240   }
241   PetscCall(PetscFree(r_rmapd));
242   PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&uglydd));
243 
244   PetscCall(PetscCalloc1(ina->Nbs+1,&lindices));
245   for (i=0; i<B->nbs; i++) {
246     lindices[garray[i]] = i+1;
247   }
248   no   = inA->rmap->mapping->n - nt;
249   PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo));
250   nt   = 0;
251   for (i=0; i<inA->rmap->mapping->n; i++) {
252     if (lindices[inA->rmap->mapping->indices[i]]) {
253       nt++;
254       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
255     }
256   }
257   PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n);
258   PetscCall(PetscFree(lindices));
259   PetscCall(PetscMalloc1(nt*bs+1,&uglyrmapo));
260   for (i=0; i<inA->rmap->mapping->n; i++) {
261     if (r_rmapo[i]) {
262       for (j=0; j<bs; j++) {
263         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
264       }
265     }
266   }
267   PetscCall(PetscFree(r_rmapo));
268   PetscCall(VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo));
269   PetscFunctionReturn(0);
270 }
271 
272 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
273 {
274   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
275 
276   PetscFunctionBegin;
277   PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));
278   PetscFunctionReturn(0);
279 }
280 
281 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
282 {
283   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
284   PetscInt          n,i;
285   PetscScalar       *d,*o;
286   const PetscScalar *s;
287 
288   PetscFunctionBegin;
289   if (!uglyrmapd) {
290     PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A,scale));
291   }
292 
293   PetscCall(VecGetArrayRead(scale,&s));
294 
295   PetscCall(VecGetLocalSize(uglydd,&n));
296   PetscCall(VecGetArray(uglydd,&d));
297   for (i=0; i<n; i++) {
298     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
299   }
300   PetscCall(VecRestoreArray(uglydd,&d));
301   /* column scale "diagonal" portion of local matrix */
302   PetscCall(MatDiagonalScale(a->A,NULL,uglydd));
303 
304   PetscCall(VecGetLocalSize(uglyoo,&n));
305   PetscCall(VecGetArray(uglyoo,&o));
306   for (i=0; i<n; i++) {
307     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
308   }
309   PetscCall(VecRestoreArrayRead(scale,&s));
310   PetscCall(VecRestoreArray(uglyoo,&o));
311   /* column scale "off-diagonal" portion of local matrix */
312   PetscCall(MatDiagonalScale(a->B,NULL,uglyoo));
313   PetscFunctionReturn(0);
314 }
315