xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 #include <petsc/private/vecimpl.h>
7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
8 
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) {
10   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
11   Mat_SeqAIJ *B   = (Mat_SeqAIJ *)(aij->B->data);
12   PetscInt    i, j, *aj = B->j, *garray;
13   PetscInt    ec = 0; /* Number of nonzero external columns */
14   IS          from, to;
15   Vec         gvec;
16 #if defined(PETSC_USE_CTABLE)
17   PetscTable         gid1_lid1;
18   PetscTablePosition tpos;
19   PetscInt           gid, lid;
20 #else
21   PetscInt N = mat->cmap->N, *indices;
22 #endif
23 
24   PetscFunctionBegin;
25   if (!aij->garray) {
26     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
27 #if defined(PETSC_USE_CTABLE)
28     /* use a table */
29     PetscCall(PetscTableCreate(aij->B->rmap->n, mat->cmap->N + 1, &gid1_lid1));
30     for (i = 0; i < aij->B->rmap->n; i++) {
31       for (j = 0; j < B->ilen[i]; j++) {
32         PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33         PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
34         if (!data) {
35           /* one based table */
36           PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
37         }
38       }
39     }
40     /* form array of columns we need */
41     PetscCall(PetscMalloc1(ec, &garray));
42     PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
43     while (tpos) {
44       PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
45       gid--;
46       lid--;
47       garray[lid] = gid;
48     }
49     PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
50     PetscCall(PetscTableRemoveAll(gid1_lid1));
51     for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); }
52     /* compact out the extra columns in B */
53     for (i = 0; i < aij->B->rmap->n; i++) {
54       for (j = 0; j < B->ilen[i]; j++) {
55         PetscInt gid1 = aj[B->i[i] + j] + 1;
56         PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
57         lid--;
58         aj[B->i[i] + j] = lid;
59       }
60     }
61     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
62     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
63     PetscCall(PetscTableDestroy(&gid1_lid1));
64 #else
65     /* Make an array as long as the number of columns */
66     /* mark those columns that are in aij->B */
67     PetscCall(PetscCalloc1(N, &indices));
68     for (i = 0; i < aij->B->rmap->n; i++) {
69       for (j = 0; j < B->ilen[i]; j++) {
70         if (!indices[aj[B->i[i] + j]]) ec++;
71         indices[aj[B->i[i] + j]] = 1;
72       }
73     }
74 
75     /* form array of columns we need */
76     PetscCall(PetscMalloc1(ec, &garray));
77     ec = 0;
78     for (i = 0; i < N; i++) {
79       if (indices[i]) garray[ec++] = i;
80     }
81 
82     /* make indices now point into garray */
83     for (i = 0; i < ec; i++) { indices[garray[i]] = i; }
84 
85     /* compact out the extra columns in B */
86     for (i = 0; i < aij->B->rmap->n; i++) {
87       for (j = 0; j < B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; }
88     }
89     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
90     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
91     PetscCall(PetscFree(indices));
92 #endif
93   } else {
94     garray = aij->garray;
95   }
96 
97   if (!aij->lvec) {
98     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
99     PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
100   }
101   PetscCall(VecGetSize(aij->lvec, &ec));
102 
103   /* create two temporary Index sets for build scatter gather */
104   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
105   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
106 
107   /* create temporary global vector to generate scatter context */
108   /* This does not allocate the array's memory so is efficient */
109   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
110 
111   /* generate the scatter context */
112   PetscCall(VecScatterDestroy(&aij->Mvctx));
113   PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
114   PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
115   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)aij->Mvctx));
116   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)aij->lvec));
117   PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt)));
118   aij->garray = garray;
119 
120   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from));
121   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to));
122 
123   PetscCall(ISDestroy(&from));
124   PetscCall(ISDestroy(&to));
125   PetscCall(VecDestroy(&gvec));
126   PetscFunctionReturn(0);
127 }
128 
129 /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
130    In other words, change the B from reduced format using local col ids
131    to expanded format using global col ids, which will make it easier to
132    insert new nonzeros (with global col ids) into the matrix.
133    The off-diag B determines communication in the matrix vector multiply.
134 */
135 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) {
136   Mat_MPIAIJ        *aij  = (Mat_MPIAIJ *)A->data;
137   Mat                B    = aij->B, Bnew;
138   Mat_SeqAIJ        *Baij = (Mat_SeqAIJ *)B->data;
139   PetscInt           i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz, ec;
140   PetscScalar        v;
141   const PetscScalar *ba;
142 
143   PetscFunctionBegin;
144   /* free stuff related to matrix-vec multiply */
145   PetscCall(VecGetSize(aij->lvec, &ec)); /* needed for PetscLogObjectMemory below */
146   PetscCall(VecDestroy(&aij->lvec));
147   if (aij->colmap) {
148 #if defined(PETSC_USE_CTABLE)
149     PetscCall(PetscTableDestroy(&aij->colmap));
150 #else
151     PetscCall(PetscFree(aij->colmap));
152     PetscCall(PetscLogObjectMemory((PetscObject)A, -aij->B->cmap->n * sizeof(PetscInt)));
153 #endif
154   }
155 
156   /* make sure that B is assembled so we can access its values */
157   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
158   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
159 
160   /* invent new B and copy stuff over */
161   PetscCall(PetscMalloc1(m + 1, &nz));
162   for (i = 0; i < m; i++) { nz[i] = Baij->i[i + 1] - Baij->i[i]; }
163   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
164   PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
165   PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
166   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
167   PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));
168 
169   if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
170     ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
171   }
172 
173   /*
174    Ensure that B's nonzerostate is monotonically increasing.
175    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
176    */
177   Bnew->nonzerostate = B->nonzerostate;
178 
179   PetscCall(PetscFree(nz));
180   PetscCall(MatSeqAIJGetArrayRead(B, &ba));
181   for (i = 0; i < m; i++) {
182     for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
183       col = garray[Baij->j[ct]];
184       v   = ba[ct++];
185       PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
186     }
187   }
188   PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
189 
190   PetscCall(PetscFree(aij->garray));
191   PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt)));
192   PetscCall(MatDestroy(&B));
193   PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew));
194 
195   aij->B           = Bnew;
196   A->was_assembled = PETSC_FALSE;
197   PetscFunctionReturn(0);
198 }
199 
200 /*      ugly stuff added for Glenn someday we should fix this up */
201 
202 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
203 static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
204 
205 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) {
206   Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
207   PetscInt    i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
208   PetscInt   *r_rmapd, *r_rmapo;
209 
210   PetscFunctionBegin;
211   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
212   PetscCall(MatGetSize(ina->A, NULL, &n));
213   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
214   nt = 0;
215   for (i = 0; i < inA->rmap->mapping->n; i++) {
216     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
217       nt++;
218       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
219     }
220   }
221   PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
222   PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
223   for (i = 0; i < inA->rmap->mapping->n; i++) {
224     if (r_rmapd[i]) { auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; }
225   }
226   PetscCall(PetscFree(r_rmapd));
227   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
228 
229   PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
230   for (i = 0; i < ina->B->cmap->n; i++) { lindices[garray[i]] = i + 1; }
231   no = inA->rmap->mapping->n - nt;
232   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
233   nt = 0;
234   for (i = 0; i < inA->rmap->mapping->n; i++) {
235     if (lindices[inA->rmap->mapping->indices[i]]) {
236       nt++;
237       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
238     }
239   }
240   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
241   PetscCall(PetscFree(lindices));
242   PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
243   for (i = 0; i < inA->rmap->mapping->n; i++) {
244     if (r_rmapo[i]) { auglyrmapo[(r_rmapo[i] - 1)] = i; }
245   }
246   PetscCall(PetscFree(r_rmapo));
247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A, Vec scale) {
252   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
253 
254   PetscFunctionBegin;
255   PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
256   PetscFunctionReturn(0);
257 }
258 
259 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale) {
260   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
261   PetscInt           n, i;
262   PetscScalar       *d, *o;
263   const PetscScalar *s;
264 
265   PetscFunctionBegin;
266   if (!auglyrmapd) { PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale)); }
267 
268   PetscCall(VecGetArrayRead(scale, &s));
269 
270   PetscCall(VecGetLocalSize(auglydd, &n));
271   PetscCall(VecGetArray(auglydd, &d));
272   for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
273   PetscCall(VecRestoreArray(auglydd, &d));
274   /* column scale "diagonal" portion of local matrix */
275   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
276 
277   PetscCall(VecGetLocalSize(auglyoo, &n));
278   PetscCall(VecGetArray(auglyoo, &o));
279   for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
280   PetscCall(VecRestoreArrayRead(scale, &s));
281   PetscCall(VecRestoreArray(auglyoo, &o));
282   /* column scale "off-diagonal" portion of local matrix */
283   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
284   PetscFunctionReturn(0);
285 }
286