xref: /petsc/src/ksp/pc/impls/redistribute/redistribute.c (revision a1cb98fac0cdf0eb4d3e8a0c8b58f3fe8f800bc6)
1 
2 /*
3   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4 */
5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
6 #include <petscksp.h>
7 
8 typedef struct {
9   KSP          ksp;
10   Vec          x, b;
11   VecScatter   scatter;
12   IS           is;
13   PetscInt     dcnt, *drows; /* these are the local rows that have only diagonal entry */
14   PetscScalar *diag;
15   Vec          work;
16   PetscBool    zerodiag;
17 } PC_Redistribute;
18 
19 static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
20 {
21   PC_Redistribute *red = (PC_Redistribute *)pc->data;
22   PetscBool        iascii, isstring;
23   PetscInt         ncnt, N;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
27   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
28   if (iascii) {
29     PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
30     PetscCall(MatGetSize(pc->pmat, &N, NULL));
31     PetscCall(PetscViewerASCIIPrintf(viewer, "    Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N))));
32     PetscCall(PetscViewerASCIIPrintf(viewer, "  Redistribute preconditioner: \n"));
33     PetscCall(KSPView(red->ksp, viewer));
34   } else if (isstring) {
35     PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner"));
36     PetscCall(KSPView(red->ksp, viewer));
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 static PetscErrorCode PCSetUp_Redistribute(PC pc)
42 {
43   PC_Redistribute         *red = (PC_Redistribute *)pc->data;
44   MPI_Comm                 comm;
45   PetscInt                 rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
46   PetscLayout              map, nmap;
47   PetscMPIInt              size, tag, n;
48   PETSC_UNUSED PetscMPIInt imdex;
49   PetscInt                *source = NULL;
50   PetscMPIInt             *sizes  = NULL, nrecvs;
51   PetscInt                 j, nsends;
52   PetscInt                *owner = NULL, *starts = NULL, count, slen;
53   PetscInt                *rvalues, *svalues, recvtotal;
54   PetscMPIInt             *onodes1, *olengths1;
55   MPI_Request             *send_waits = NULL, *recv_waits = NULL;
56   MPI_Status               recv_status, *send_status;
57   Vec                      tvec, diag;
58   Mat                      tmat;
59   const PetscScalar       *d, *values;
60   const PetscInt          *cols;
61 
62   PetscFunctionBegin;
63   if (pc->setupcalled) {
64     PetscCall(KSPGetOperators(red->ksp, NULL, &tmat));
65     PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat));
66     PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
67   } else {
68     PetscInt NN;
69 
70     PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
71     PetscCallMPI(MPI_Comm_size(comm, &size));
72     PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
73 
74     /* count non-diagonal rows on process */
75     PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend));
76     cnt = 0;
77     for (i = rstart; i < rend; i++) {
78       PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
79       for (PetscInt j = 0; j < nz; j++) {
80         if (values[j] != 0 && cols[j] != i) {
81           cnt++;
82           break;
83         }
84       }
85       PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
86     }
87     PetscCall(PetscMalloc1(cnt, &rows));
88     PetscCall(PetscMalloc1(rend - rstart - cnt, &drows));
89 
90     /* list non-diagonal rows on process */
91     cnt  = 0;
92     dcnt = 0;
93     for (i = rstart; i < rend; i++) {
94       PetscBool diagonly = PETSC_TRUE;
95       PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
96       for (PetscInt j = 0; j < nz; j++) {
97         if (values[j] != 0 && cols[j] != i) {
98           diagonly = PETSC_FALSE;
99           break;
100         }
101       }
102       if (!diagonly) rows[cnt++] = i;
103       else drows[dcnt++] = i - rstart;
104       PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
105     }
106 
107     /* create PetscLayout for non-diagonal rows on each process */
108     PetscCall(PetscLayoutCreate(comm, &map));
109     PetscCall(PetscLayoutSetLocalSize(map, cnt));
110     PetscCall(PetscLayoutSetBlockSize(map, 1));
111     PetscCall(PetscLayoutSetUp(map));
112     rstart = map->rstart;
113     rend   = map->rend;
114 
115     /* create PetscLayout for load-balanced non-diagonal rows on each process */
116     PetscCall(PetscLayoutCreate(comm, &nmap));
117     PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm));
118     PetscCall(PetscLayoutSetSize(nmap, ncnt));
119     PetscCall(PetscLayoutSetBlockSize(nmap, 1));
120     PetscCall(PetscLayoutSetUp(nmap));
121 
122     PetscCall(MatGetSize(pc->pmat, &NN, NULL));
123     PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN)))));
124 
125     if (size > 1) {
126       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
127       /*
128        this code is taken from VecScatterCreate_PtoS()
129        Determines what rows need to be moved where to
130        load balance the non-diagonal rows
131        */
132       /*  count number of contributors to each processor */
133       PetscCall(PetscMalloc2(size, &sizes, cnt, &owner));
134       PetscCall(PetscArrayzero(sizes, size));
135       j      = 0;
136       nsends = 0;
137       for (i = rstart; i < rend; i++) {
138         if (i < nmap->range[j]) j = 0;
139         for (; j < size; j++) {
140           if (i < nmap->range[j + 1]) {
141             if (!sizes[j]++) nsends++;
142             owner[i - rstart] = j;
143             break;
144           }
145         }
146       }
147       /* inform other processors of number of messages and max length*/
148       PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs));
149       PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1));
150       PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1));
151       recvtotal = 0;
152       for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];
153 
154       /* post receives:  rvalues - rows I will own; count - nu */
155       PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits));
156       count = 0;
157       for (i = 0; i < nrecvs; i++) {
158         PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i));
159         count += olengths1[i];
160       }
161 
162       /* do sends:
163        1) starts[i] gives the starting index in svalues for stuff going to
164        the ith processor
165        */
166       PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts));
167       starts[0] = 0;
168       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
169       for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
170       for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
171       red->drows = drows;
172       red->dcnt  = dcnt;
173       PetscCall(PetscFree(rows));
174 
175       starts[0] = 0;
176       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
177       count = 0;
178       for (i = 0; i < size; i++) {
179         if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++));
180       }
181 
182       /*  wait on receives */
183       count = nrecvs;
184       slen  = 0;
185       while (count) {
186         PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status));
187         /* unpack receives into our local space */
188         PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n));
189         slen += n;
190         count--;
191       }
192       PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal);
193       PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is));
194 
195       /* free all work space */
196       PetscCall(PetscFree(olengths1));
197       PetscCall(PetscFree(onodes1));
198       PetscCall(PetscFree3(rvalues, source, recv_waits));
199       PetscCall(PetscFree2(sizes, owner));
200       if (nsends) { /* wait on sends */
201         PetscCall(PetscMalloc1(nsends, &send_status));
202         PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
203         PetscCall(PetscFree(send_status));
204       }
205       PetscCall(PetscFree3(svalues, send_waits, starts));
206     } else {
207       PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is));
208       red->drows = drows;
209       red->dcnt  = dcnt;
210       slen       = cnt;
211     }
212     PetscCall(PetscLayoutDestroy(&map));
213     PetscCall(PetscLayoutDestroy(&nmap));
214 
215     PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b));
216     PetscCall(VecDuplicate(red->b, &red->x));
217     PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
218     PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter));
219     PetscCall(VecDestroy(&tvec));
220     PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat));
221     PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
222     PetscCall(MatDestroy(&tmat));
223   }
224 
225   /* get diagonal portion of matrix */
226   PetscCall(PetscFree(red->diag));
227   PetscCall(PetscMalloc1(red->dcnt, &red->diag));
228   PetscCall(MatCreateVecs(pc->pmat, &diag, NULL));
229   PetscCall(MatGetDiagonal(pc->pmat, diag));
230   PetscCall(VecGetArrayRead(diag, &d));
231   for (i = 0; i < red->dcnt; i++) {
232     if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]];
233     else {
234       red->zerodiag = PETSC_TRUE;
235       red->diag[i]  = 0.0;
236     }
237   }
238   PetscCall(VecRestoreArrayRead(diag, &d));
239   PetscCall(VecDestroy(&diag));
240   PetscCall(KSPSetUp(red->ksp));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
245 {
246   PC_Redistribute   *red   = (PC_Redistribute *)pc->data;
247   PetscInt           dcnt  = red->dcnt, i;
248   const PetscInt    *drows = red->drows;
249   PetscScalar       *xwork;
250   const PetscScalar *bwork, *diag = red->diag;
251 
252   PetscFunctionBegin;
253   if (!red->work) PetscCall(VecDuplicate(b, &red->work));
254   /* compute the rows of solution that have diagonal entries only */
255   PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */
256   PetscCall(VecGetArray(x, &xwork));
257   PetscCall(VecGetArrayRead(b, &bwork));
258   if (red->zerodiag) {
259     for (i = 0; i < dcnt; i++) {
260       if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) {
261         PetscCheck(!pc->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Linear system is inconsistent, zero matrix row but nonzero right hand side");
262         PetscCall(PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side"));
263         PetscCall(VecSetInf(x));
264         pc->failedreasonrank = PC_INCONSISTENT_RHS;
265       }
266     }
267   }
268   for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
269   PetscCall(PetscLogFlops(dcnt));
270   PetscCall(VecRestoreArray(red->work, &xwork));
271   PetscCall(VecRestoreArrayRead(b, &bwork));
272   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
273   PetscCall(MatMult(pc->pmat, x, red->work));
274   PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */
275 
276   PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
277   PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
278   PetscCall(KSPSolve(red->ksp, red->b, red->x));
279   PetscCall(KSPCheckSolve(red->ksp, pc, red->x));
280   PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
281   PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode PCDestroy_Redistribute(PC pc)
286 {
287   PC_Redistribute *red = (PC_Redistribute *)pc->data;
288 
289   PetscFunctionBegin;
290   PetscCall(VecScatterDestroy(&red->scatter));
291   PetscCall(ISDestroy(&red->is));
292   PetscCall(VecDestroy(&red->b));
293   PetscCall(VecDestroy(&red->x));
294   PetscCall(KSPDestroy(&red->ksp));
295   PetscCall(VecDestroy(&red->work));
296   PetscCall(PetscFree(red->drows));
297   PetscCall(PetscFree(red->diag));
298   PetscCall(PetscFree(pc->data));
299   PetscFunctionReturn(0);
300 }
301 
302 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
303 {
304   PC_Redistribute *red = (PC_Redistribute *)pc->data;
305 
306   PetscFunctionBegin;
307   PetscCall(KSPSetFromOptions(red->ksp));
308   PetscFunctionReturn(0);
309 }
310 
311 /*@
312    PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`
313 
314    Not Collective
315 
316    Input Parameter:
317 .  pc - the preconditioner context
318 
319    Output Parameter:
320 .  innerksp - the inner `KSP`
321 
322    Level: advanced
323 
324 .seealso: `KSP`, `PCREDISTRIBUTE`
325 @*/
326 PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
327 {
328   PC_Redistribute *red = (PC_Redistribute *)pc->data;
329 
330   PetscFunctionBegin;
331   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
332   PetscValidPointer(innerksp, 2);
333   *innerksp = red->ksp;
334   PetscFunctionReturn(0);
335 }
336 
337 /*MC
338      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
339      applies a `KSP` to that new smaller matrix
340 
341      Notes:
342      Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_
343 
344      Usually run this with -ksp_type preonly
345 
346      If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
347      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
348 
349      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.
350 
351      Developer Note:
352      Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
353 
354    Level: intermediate
355 
356 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
357 M*/
358 
359 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
360 {
361   PC_Redistribute *red;
362   const char      *prefix;
363 
364   PetscFunctionBegin;
365   PetscCall(PetscNew(&red));
366   pc->data = (void *)red;
367 
368   pc->ops->apply          = PCApply_Redistribute;
369   pc->ops->applytranspose = NULL;
370   pc->ops->setup          = PCSetUp_Redistribute;
371   pc->ops->destroy        = PCDestroy_Redistribute;
372   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
373   pc->ops->view           = PCView_Redistribute;
374 
375   PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp));
376   PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure));
377   PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1));
378   PetscCall(PCGetOptionsPrefix(pc, &prefix));
379   PetscCall(KSPSetOptionsPrefix(red->ksp, prefix));
380   PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_"));
381   PetscFunctionReturn(0);
382 }
383