xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)(baij->B->data);
11   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
12   PetscInt     bs = mat->rmap->bs, *stmp;
13   IS           from, to;
14   Vec          gvec;
15 #if defined(PETSC_USE_CTABLE)
16   PetscTable         gid1_lid1;
17   PetscTablePosition tpos;
18   PetscInt           gid, lid;
19 #else
20   PetscInt Nbs = baij->Nbs, *indices;
21 #endif
22 
23   PetscFunctionBegin;
24 #if defined(PETSC_USE_CTABLE)
25   /* use a table - Mark Adams */
26   PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1));
27   for (i = 0; i < B->mbs; i++) {
28     for (j = 0; j < B->ilen[i]; j++) {
29       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
30       PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
31       if (!data) {
32         /* one based table */
33         PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
34       }
35     }
36   }
37   /* form array of columns we need */
38   PetscCall(PetscMalloc1(ec, &garray));
39   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
40   while (tpos) {
41     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
42     gid--;
43     lid--;
44     garray[lid] = gid;
45   }
46   PetscCall(PetscSortInt(ec, garray));
47   PetscCall(PetscTableRemoveAll(gid1_lid1));
48   for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); }
49   /* compact out the extra columns in B */
50   for (i = 0; i < B->mbs; i++) {
51     for (j = 0; j < B->ilen[i]; j++) {
52       PetscInt gid1 = aj[B->i[i] + j] + 1;
53       PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
54       lid--;
55       aj[B->i[i] + j] = lid;
56     }
57   }
58   B->nbs = ec;
59   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
60   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
61   PetscCall(PetscTableDestroy(&gid1_lid1));
62 #else
63   /* Make an array as long as the number of columns */
64   /* mark those columns that are in baij->B */
65   PetscCall(PetscCalloc1(Nbs, &indices));
66   for (i = 0; i < B->mbs; i++) {
67     for (j = 0; j < B->ilen[i]; j++) {
68       if (!indices[aj[B->i[i] + j]]) ec++;
69       indices[aj[B->i[i] + j]] = 1;
70     }
71   }
72 
73   /* form array of columns we need */
74   PetscCall(PetscMalloc1(ec, &garray));
75   ec = 0;
76   for (i = 0; i < Nbs; i++) {
77     if (indices[i]) { garray[ec++] = i; }
78   }
79 
80   /* make indices now point into garray */
81   for (i = 0; i < ec; i++) { indices[garray[i]] = i; }
82 
83   /* compact out the extra columns in B */
84   for (i = 0; i < B->mbs; i++) {
85     for (j = 0; j < B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; }
86   }
87   B->nbs = ec;
88   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
89   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
90   PetscCall(PetscFree(indices));
91 #endif
92 
93   /* create local vector that is used to scatter into */
94   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
95 
96   /* create two temporary index sets for building scatter-gather */
97   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
98 
99   PetscCall(PetscMalloc1(ec, &stmp));
100   for (i = 0; i < ec; i++) stmp[i] = i;
101   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
102 
103   /* create temporary global vector to generate scatter context */
104   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
105 
106   PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
107   PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
108 
109   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)baij->Mvctx));
110   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)baij->lvec));
111   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from));
112   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to));
113 
114   baij->garray = garray;
115 
116   PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt)));
117   PetscCall(ISDestroy(&from));
118   PetscCall(ISDestroy(&to));
119   PetscCall(VecDestroy(&gvec));
120   PetscFunctionReturn(0);
121 }
122 
123 /*
124      Takes the local part of an already assembled MPIBAIJ matrix
125    and disassembles it. This is to allow new nonzeros into the matrix
126    that require more communication in the matrix vector multiply.
127    Thus certain data-structures must be rebuilt.
128 
129    Kind of slow! But that's what application programmers get when
130    they are sloppy.
131 */
132 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) {
133   Mat_MPIBAIJ *baij  = (Mat_MPIBAIJ *)A->data;
134   Mat          B     = baij->B, Bnew;
135   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
136   PetscInt     i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
137   PetscInt     bs2 = baij->bs2, *nz, ec, m = A->rmap->n;
138   MatScalar   *a = Bbaij->a;
139   MatScalar   *atmp;
140 
141   PetscFunctionBegin;
142   /* free stuff related to matrix-vec multiply */
143   PetscCall(VecGetSize(baij->lvec, &ec)); /* needed for PetscLogObjectMemory below */
144   PetscCall(VecDestroy(&baij->lvec));
145   baij->lvec = NULL;
146   PetscCall(VecScatterDestroy(&baij->Mvctx));
147   baij->Mvctx = NULL;
148   if (baij->colmap) {
149 #if defined(PETSC_USE_CTABLE)
150     PetscCall(PetscTableDestroy(&baij->colmap));
151 #else
152     PetscCall(PetscFree(baij->colmap));
153     PetscCall(PetscLogObjectMemory((PetscObject)A, -Bbaij->nbs * sizeof(PetscInt)));
154 #endif
155   }
156 
157   /* make sure that B is assembled so we can access its values */
158   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
159   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
160 
161   /* invent new B and copy stuff over */
162   PetscCall(PetscMalloc1(mbs, &nz));
163   for (i = 0; i < mbs; i++) { nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; }
164   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
165   PetscCall(MatSetSizes(Bnew, m, n, m, n));
166   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
167   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
168   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
169     ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
170   }
171 
172   PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
173   /*
174    Ensure that B's nonzerostate is monotonically increasing.
175    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
176    */
177   Bnew->nonzerostate = B->nonzerostate;
178 
179   for (i = 0; i < mbs; i++) {
180     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
181       col  = garray[Bbaij->j[j]];
182       atmp = a + j * bs2;
183       PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
184     }
185   }
186   PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE));
187 
188   PetscCall(PetscFree(nz));
189   PetscCall(PetscFree(baij->garray));
190   PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt)));
191   PetscCall(MatDestroy(&B));
192   PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew));
193 
194   baij->B          = Bnew;
195   A->was_assembled = PETSC_FALSE;
196   A->assembled     = PETSC_FALSE;
197   PetscFunctionReturn(0);
198 }
199 
200 /*      ugly stuff added for Glenn someday we should fix this up */
201 
202 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
203 static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
204 
205 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) {
206   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
207   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
208   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
209   PetscInt    *r_rmapd, *r_rmapo;
210 
211   PetscFunctionBegin;
212   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
213   PetscCall(MatGetSize(ina->A, NULL, &n));
214   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
215   nt = 0;
216   for (i = 0; i < inA->rmap->mapping->n; i++) {
217     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
218       nt++;
219       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
220     }
221   }
222   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
223   PetscCall(PetscMalloc1(n + 1, &uglyrmapd));
224   for (i = 0; i < inA->rmap->mapping->n; i++) {
225     if (r_rmapd[i]) {
226       for (j = 0; j < bs; j++) { uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; }
227     }
228   }
229   PetscCall(PetscFree(r_rmapd));
230   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
231 
232   PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices));
233   for (i = 0; i < B->nbs; i++) { lindices[garray[i]] = i + 1; }
234   no = inA->rmap->mapping->n - nt;
235   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
236   nt = 0;
237   for (i = 0; i < inA->rmap->mapping->n; i++) {
238     if (lindices[inA->rmap->mapping->indices[i]]) {
239       nt++;
240       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
241     }
242   }
243   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
244   PetscCall(PetscFree(lindices));
245   PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo));
246   for (i = 0; i < inA->rmap->mapping->n; i++) {
247     if (r_rmapo[i]) {
248       for (j = 0; j < bs; j++) { uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; }
249     }
250   }
251   PetscCall(PetscFree(r_rmapo));
252   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
253   PetscFunctionReturn(0);
254 }
255 
256 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) {
257   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
258 
259   PetscFunctionBegin;
260   PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) {
265   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
266   PetscInt           n, i;
267   PetscScalar       *d, *o;
268   const PetscScalar *s;
269 
270   PetscFunctionBegin;
271   if (!uglyrmapd) { PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); }
272 
273   PetscCall(VecGetArrayRead(scale, &s));
274 
275   PetscCall(VecGetLocalSize(uglydd, &n));
276   PetscCall(VecGetArray(uglydd, &d));
277   for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
278   PetscCall(VecRestoreArray(uglydd, &d));
279   /* column scale "diagonal" portion of local matrix */
280   PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
281 
282   PetscCall(VecGetLocalSize(uglyoo, &n));
283   PetscCall(VecGetArray(uglyoo, &o));
284   for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
285   PetscCall(VecRestoreArrayRead(scale, &s));
286   PetscCall(VecRestoreArray(uglyoo, &o));
287   /* column scale "off-diagonal" portion of local matrix */
288   PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
289   PetscFunctionReturn(0);
290 }
291