xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
11   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
12   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
13   PetscInt       bs = mat->rmap->bs,*stmp;
14   IS             from,to;
15   Vec            gvec;
16 #if defined(PETSC_USE_CTABLE)
17   PetscTable         gid1_lid1;
18   PetscTablePosition tpos;
19   PetscInt           gid,lid;
20 #else
21   PetscInt Nbs = baij->Nbs,*indices;
22 #endif
23 
24   PetscFunctionBegin;
25 #if defined(PETSC_USE_CTABLE)
26   /* use a table - Mark Adams */
27   CHKERRQ(PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1));
28   for (i=0; i<B->mbs; i++) {
29     for (j=0; j<B->ilen[i]; j++) {
30       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
31       CHKERRQ(PetscTableFind(gid1_lid1,gid1,&data));
32       if (!data) {
33         /* one based table */
34         CHKERRQ(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES));
35       }
36     }
37   }
38   /* form array of columns we need */
39   CHKERRQ(PetscMalloc1(ec,&garray));
40   CHKERRQ(PetscTableGetHeadPosition(gid1_lid1,&tpos));
41   while (tpos) {
42     CHKERRQ(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid));
43     gid--; lid--;
44     garray[lid] = gid;
45   }
46   CHKERRQ(PetscSortInt(ec,garray));
47   CHKERRQ(PetscTableRemoveAll(gid1_lid1));
48   for (i=0; i<ec; i++) {
49     CHKERRQ(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES));
50   }
51   /* compact out the extra columns in B */
52   for (i=0; i<B->mbs; i++) {
53     for (j=0; j<B->ilen[i]; j++) {
54       PetscInt gid1 = aj[B->i[i] + j] + 1;
55       CHKERRQ(PetscTableFind(gid1_lid1,gid1,&lid));
56       lid--;
57       aj[B->i[i]+j] = lid;
58     }
59   }
60   B->nbs           = ec;
61   CHKERRQ(PetscLayoutDestroy(&baij->B->cmap));
62   CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap));
63   CHKERRQ(PetscTableDestroy(&gid1_lid1));
64 #else
65   /* Make an array as long as the number of columns */
66   /* mark those columns that are in baij->B */
67   CHKERRQ(PetscCalloc1(Nbs,&indices));
68   for (i=0; i<B->mbs; i++) {
69     for (j=0; j<B->ilen[i]; j++) {
70       if (!indices[aj[B->i[i] + j]]) ec++;
71       indices[aj[B->i[i] + j]] = 1;
72     }
73   }
74 
75   /* form array of columns we need */
76   CHKERRQ(PetscMalloc1(ec,&garray));
77   ec   = 0;
78   for (i=0; i<Nbs; i++) {
79     if (indices[i]) {
80       garray[ec++] = i;
81     }
82   }
83 
84   /* make indices now point into garray */
85   for (i=0; i<ec; i++) {
86     indices[garray[i]] = i;
87   }
88 
89   /* compact out the extra columns in B */
90   for (i=0; i<B->mbs; i++) {
91     for (j=0; j<B->ilen[i]; j++) {
92       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
93     }
94   }
95   B->nbs           = ec;
96   CHKERRQ(PetscLayoutDestroy(&baij->B->cmap));
97   CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap));
98   CHKERRQ(PetscFree(indices));
99 #endif
100 
101   /* create local vector that is used to scatter into */
102   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec));
103 
104   /* create two temporary index sets for building scatter-gather */
105   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from));
106 
107   CHKERRQ(PetscMalloc1(ec,&stmp));
108   for (i=0; i<ec; i++) stmp[i] = i;
109   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to));
110 
111   /* create temporary global vector to generate scatter context */
112   CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec));
113 
114   CHKERRQ(VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx));
115   CHKERRQ(VecScatterViewFromOptions(baij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view"));
116 
117   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx));
118   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec));
119   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
120   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
121 
122   baij->garray = garray;
123 
124   CHKERRQ(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt)));
125   CHKERRQ(ISDestroy(&from));
126   CHKERRQ(ISDestroy(&to));
127   CHKERRQ(VecDestroy(&gvec));
128   PetscFunctionReturn(0);
129 }
130 
131 /*
132      Takes the local part of an already assembled MPIBAIJ matrix
133    and disassembles it. This is to allow new nonzeros into the matrix
134    that require more communication in the matrix vector multiply.
135    Thus certain data-structures must be rebuilt.
136 
137    Kind of slow! But that's what application programmers get when
138    they are sloppy.
139 */
140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
141 {
142   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
143   Mat            B      = baij->B,Bnew;
144   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
145   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
146   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
147   MatScalar      *a  = Bbaij->a;
148   MatScalar      *atmp;
149 
150   PetscFunctionBegin;
151   /* free stuff related to matrix-vec multiply */
152   CHKERRQ(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */
153   CHKERRQ(VecDestroy(&baij->lvec)); baij->lvec = NULL;
154   CHKERRQ(VecScatterDestroy(&baij->Mvctx)); baij->Mvctx = NULL;
155   if (baij->colmap) {
156 #if defined(PETSC_USE_CTABLE)
157     CHKERRQ(PetscTableDestroy(&baij->colmap));
158 #else
159     CHKERRQ(PetscFree(baij->colmap));
160     CHKERRQ(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt)));
161 #endif
162   }
163 
164   /* make sure that B is assembled so we can access its values */
165   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
166   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
167 
168   /* invent new B and copy stuff over */
169   CHKERRQ(PetscMalloc1(mbs,&nz));
170   for (i=0; i<mbs; i++) {
171     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
172   }
173   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)B),&Bnew));
174   CHKERRQ(MatSetSizes(Bnew,m,n,m,n));
175   CHKERRQ(MatSetType(Bnew,((PetscObject)B)->type_name));
176   CHKERRQ(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz));
177   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
178     ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
179   }
180 
181   CHKERRQ(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE));
182   /*
183    Ensure that B's nonzerostate is monotonically increasing.
184    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
185    */
186   Bnew->nonzerostate = B->nonzerostate;
187 
188   for (i=0; i<mbs; i++) {
189     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
190       col  = garray[Bbaij->j[j]];
191       atmp = a + j*bs2;
192       CHKERRQ(MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode));
193     }
194   }
195   CHKERRQ(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE));
196 
197   CHKERRQ(PetscFree(nz));
198   CHKERRQ(PetscFree(baij->garray));
199   CHKERRQ(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt)));
200   CHKERRQ(MatDestroy(&B));
201   CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew));
202 
203   baij->B          = Bnew;
204   A->was_assembled = PETSC_FALSE;
205   A->assembled     = PETSC_FALSE;
206   PetscFunctionReturn(0);
207 }
208 
209 /*      ugly stuff added for Glenn someday we should fix this up */
210 
211 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
212 static Vec      uglydd     = NULL,uglyoo     = NULL;  /* work vectors used to scale the two parts of the local matrix */
213 
214 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
215 {
216   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
217   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
218   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
219   PetscInt       *r_rmapd,*r_rmapo;
220 
221   PetscFunctionBegin;
222   CHKERRQ(MatGetOwnershipRange(inA,&cstart,&cend));
223   CHKERRQ(MatGetSize(ina->A,NULL,&n));
224   CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd));
225   nt   = 0;
226   for (i=0; i<inA->rmap->mapping->n; i++) {
227     if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
228       nt++;
229       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
230     }
231   }
232   PetscCheckFalse(nt*bs != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT,nt*bs,n);
233   CHKERRQ(PetscMalloc1(n+1,&uglyrmapd));
234   for (i=0; i<inA->rmap->mapping->n; i++) {
235     if (r_rmapd[i]) {
236       for (j=0; j<bs; j++) {
237         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
238       }
239     }
240   }
241   CHKERRQ(PetscFree(r_rmapd));
242   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&uglydd));
243 
244   CHKERRQ(PetscCalloc1(ina->Nbs+1,&lindices));
245   for (i=0; i<B->nbs; i++) {
246     lindices[garray[i]] = i+1;
247   }
248   no   = inA->rmap->mapping->n - nt;
249   CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo));
250   nt   = 0;
251   for (i=0; i<inA->rmap->mapping->n; i++) {
252     if (lindices[inA->rmap->mapping->indices[i]]) {
253       nt++;
254       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
255     }
256   }
257   PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n);
258   CHKERRQ(PetscFree(lindices));
259   CHKERRQ(PetscMalloc1(nt*bs+1,&uglyrmapo));
260   for (i=0; i<inA->rmap->mapping->n; i++) {
261     if (r_rmapo[i]) {
262       for (j=0; j<bs; j++) {
263         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
264       }
265     }
266   }
267   CHKERRQ(PetscFree(r_rmapo));
268   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo));
269   PetscFunctionReturn(0);
270 }
271 
272 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
273 {
274   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
275 
276   PetscFunctionBegin;
277   CHKERRQ(PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale)));
278   PetscFunctionReturn(0);
279 }
280 
281 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
282 {
283   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
284   PetscInt          n,i;
285   PetscScalar       *d,*o;
286   const PetscScalar *s;
287 
288   PetscFunctionBegin;
289   if (!uglyrmapd) {
290     CHKERRQ(MatMPIBAIJDiagonalScaleLocalSetUp(A,scale));
291   }
292 
293   CHKERRQ(VecGetArrayRead(scale,&s));
294 
295   CHKERRQ(VecGetLocalSize(uglydd,&n));
296   CHKERRQ(VecGetArray(uglydd,&d));
297   for (i=0; i<n; i++) {
298     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
299   }
300   CHKERRQ(VecRestoreArray(uglydd,&d));
301   /* column scale "diagonal" portion of local matrix */
302   CHKERRQ(MatDiagonalScale(a->A,NULL,uglydd));
303 
304   CHKERRQ(VecGetLocalSize(uglyoo,&n));
305   CHKERRQ(VecGetArray(uglyoo,&o));
306   for (i=0; i<n; i++) {
307     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
308   }
309   CHKERRQ(VecRestoreArrayRead(scale,&s));
310   CHKERRQ(VecRestoreArray(uglyoo,&o));
311   /* column scale "off-diagonal" portion of local matrix */
312   CHKERRQ(MatDiagonalScale(a->B,NULL,uglyoo));
313   PetscFunctionReturn(0);
314 }
315