xref: /petsc/src/ksp/pc/impls/redistribute/redistribute.c (revision b8d709ab44c354dba7591c0191c0984ef3bb0cd5)
1 
2 /*
3   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4 */
5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/
6 #include <petscksp.h>
7 
8 typedef struct {
9   KSP          ksp;
10   Vec          x, b;
11   VecScatter   scatter;
12   IS           is;
13   PetscInt     dcnt, *drows; /* these are the local rows that have only diagonal entry */
14   PetscScalar *diag;
15   Vec          work;
16   PetscBool    zerodiag;
17 } PC_Redistribute;
18 
19 static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
20 {
21   PC_Redistribute *red = (PC_Redistribute *)pc->data;
22   PetscBool        iascii, isstring;
23   PetscInt         ncnt, N;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
27   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
28   if (iascii) {
29     PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
30     PetscCall(MatGetSize(pc->pmat, &N, NULL));
31     PetscCall(PetscViewerASCIIPrintf(viewer, "    Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N))));
32     PetscCall(PetscViewerASCIIPrintf(viewer, "  Redistribute preconditioner: \n"));
33     PetscCall(KSPView(red->ksp, viewer));
34   } else if (isstring) {
35     PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner"));
36     PetscCall(KSPView(red->ksp, viewer));
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 static PetscErrorCode PCSetUp_Redistribute(PC pc)
42 {
43   PC_Redistribute         *red = (PC_Redistribute *)pc->data;
44   MPI_Comm                 comm;
45   PetscInt                 rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
46   PetscLayout              map, nmap;
47   PetscMPIInt              size, tag, n;
48   PETSC_UNUSED PetscMPIInt imdex;
49   PetscInt                *source = NULL;
50   PetscMPIInt             *sizes  = NULL, nrecvs;
51   PetscInt                 j, nsends;
52   PetscInt                *owner = NULL, *starts = NULL, count, slen;
53   PetscInt                *rvalues, *svalues, recvtotal;
54   PetscMPIInt             *onodes1, *olengths1;
55   MPI_Request             *send_waits = NULL, *recv_waits = NULL;
56   MPI_Status               recv_status, *send_status;
57   Vec                      tvec, diag;
58   Mat                      tmat;
59   const PetscScalar       *d, *values;
60   const PetscInt          *cols;
61 
62   PetscFunctionBegin;
63   if (pc->setupcalled) {
64     PetscCall(KSPGetOperators(red->ksp, NULL, &tmat));
65     PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat));
66     PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
67   } else {
68     PetscInt NN;
69 
70     PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
71     PetscCallMPI(MPI_Comm_size(comm, &size));
72     PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
73 
74     /* count non-diagonal rows on process */
75     PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend));
76     cnt = 0;
77     for (i = rstart; i < rend; i++) {
78       PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
79       for (PetscInt j = 0; j < nz; j++) {
80         if (values[j] != 0 && cols[j] != i) {
81           cnt++;
82           break;
83         }
84       }
85       PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
86     }
87     PetscCall(PetscMalloc1(cnt, &rows));
88     PetscCall(PetscMalloc1(rend - rstart - cnt, &drows));
89 
90     /* list non-diagonal rows on process */
91     cnt  = 0;
92     dcnt = 0;
93     for (i = rstart; i < rend; i++) {
94       PetscBool diagonly = PETSC_TRUE;
95       PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
96       for (PetscInt j = 0; j < nz; j++) {
97         if (values[j] != 0 && cols[j] != i) {
98           diagonly = PETSC_FALSE;
99           break;
100         }
101       }
102       if (!diagonly) rows[cnt++] = i;
103       else drows[dcnt++] = i - rstart;
104       PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
105     }
106 
107     /* create PetscLayout for non-diagonal rows on each process */
108     PetscCall(PetscLayoutCreate(comm, &map));
109     PetscCall(PetscLayoutSetLocalSize(map, cnt));
110     PetscCall(PetscLayoutSetBlockSize(map, 1));
111     PetscCall(PetscLayoutSetUp(map));
112     rstart = map->rstart;
113     rend   = map->rend;
114 
115     /* create PetscLayout for load-balanced non-diagonal rows on each process */
116     PetscCall(PetscLayoutCreate(comm, &nmap));
117     PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm));
118     PetscCall(PetscLayoutSetSize(nmap, ncnt));
119     PetscCall(PetscLayoutSetBlockSize(nmap, 1));
120     PetscCall(PetscLayoutSetUp(nmap));
121 
122     PetscCall(MatGetSize(pc->pmat, &NN, NULL));
123     PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN)))));
124 
125     if (size > 1) {
126       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
127       /*
128        this code is taken from VecScatterCreate_PtoS()
129        Determines what rows need to be moved where to
130        load balance the non-diagonal rows
131        */
132       /*  count number of contributors to each processor */
133       PetscCall(PetscMalloc2(size, &sizes, cnt, &owner));
134       PetscCall(PetscArrayzero(sizes, size));
135       j      = 0;
136       nsends = 0;
137       for (i = rstart; i < rend; i++) {
138         if (i < nmap->range[j]) j = 0;
139         for (; j < size; j++) {
140           if (i < nmap->range[j + 1]) {
141             if (!sizes[j]++) nsends++;
142             owner[i - rstart] = j;
143             break;
144           }
145         }
146       }
147       /* inform other processors of number of messages and max length*/
148       PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs));
149       PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1));
150       PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1));
151       recvtotal = 0;
152       for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];
153 
154       /* post receives:  rvalues - rows I will own; count - nu */
155       PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits));
156       count = 0;
157       for (i = 0; i < nrecvs; i++) {
158         PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i));
159         count += olengths1[i];
160       }
161 
162       /* do sends:
163        1) starts[i] gives the starting index in svalues for stuff going to
164        the ith processor
165        */
166       PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts));
167       starts[0] = 0;
168       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
169       for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
170       for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
171       red->drows = drows;
172       red->dcnt  = dcnt;
173       PetscCall(PetscFree(rows));
174 
175       starts[0] = 0;
176       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
177       count = 0;
178       for (i = 0; i < size; i++) {
179         if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++));
180       }
181 
182       /*  wait on receives */
183       count = nrecvs;
184       slen  = 0;
185       while (count) {
186         PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status));
187         /* unpack receives into our local space */
188         PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n));
189         slen += n;
190         count--;
191       }
192       PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal);
193       PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is));
194 
195       /* free all work space */
196       PetscCall(PetscFree(olengths1));
197       PetscCall(PetscFree(onodes1));
198       PetscCall(PetscFree3(rvalues, source, recv_waits));
199       PetscCall(PetscFree2(sizes, owner));
200       if (nsends) { /* wait on sends */
201         PetscCall(PetscMalloc1(nsends, &send_status));
202         PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
203         PetscCall(PetscFree(send_status));
204       }
205       PetscCall(PetscFree3(svalues, send_waits, starts));
206     } else {
207       PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is));
208       red->drows = drows;
209       red->dcnt  = dcnt;
210       slen       = cnt;
211     }
212     PetscCall(PetscLayoutDestroy(&map));
213     PetscCall(PetscLayoutDestroy(&nmap));
214 
215     PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b));
216     PetscCall(VecDuplicate(red->b, &red->x));
217     PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
218     PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter));
219     PetscCall(VecDestroy(&tvec));
220     PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat));
221     PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
222     PetscCall(MatDestroy(&tmat));
223   }
224 
225   /* get diagonal portion of matrix */
226   PetscCall(PetscFree(red->diag));
227   PetscCall(PetscMalloc1(red->dcnt, &red->diag));
228   PetscCall(MatCreateVecs(pc->pmat, &diag, NULL));
229   PetscCall(MatGetDiagonal(pc->pmat, diag));
230   PetscCall(VecGetArrayRead(diag, &d));
231   for (i = 0; i < red->dcnt; i++) {
232     if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]];
233     else {
234       red->zerodiag = PETSC_TRUE;
235       red->diag[i]  = 0.0;
236     }
237   }
238   PetscCall(VecRestoreArrayRead(diag, &d));
239   PetscCall(VecDestroy(&diag));
240   PetscCall(KSPSetUp(red->ksp));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
245 {
246   PC_Redistribute   *red   = (PC_Redistribute *)pc->data;
247   PetscInt           dcnt  = red->dcnt, i;
248   const PetscInt    *drows = red->drows;
249   PetscScalar       *xwork;
250   const PetscScalar *bwork, *diag = red->diag;
251 
252   PetscFunctionBegin;
253   if (!red->work) PetscCall(VecDuplicate(b, &red->work));
254   /* compute the rows of solution that have diagonal entries only */
255   PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */
256   PetscCall(VecGetArray(x, &xwork));
257   PetscCall(VecGetArrayRead(b, &bwork));
258   if (red->zerodiag) {
259     for (i = 0; i < dcnt; i++) {
260       if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) {
261         PetscCheck(!pc->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Linear system is inconsistent, zero matrix row but nonzero right hand side");
262         PetscCall(PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side"));
263         PetscCall(VecSetInf(x));
264         pc->failedreasonrank = PC_INCONSISTENT_RHS;
265       }
266     }
267   }
268   for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
269   PetscCall(PetscLogFlops(dcnt));
270   PetscCall(VecRestoreArray(red->work, &xwork));
271   PetscCall(VecRestoreArrayRead(b, &bwork));
272   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
273   PetscCall(MatMult(pc->pmat, x, red->work));
274   PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */
275 
276   PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
277   PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
278   PetscCall(KSPSolve(red->ksp, red->b, red->x));
279   PetscCall(KSPCheckSolve(red->ksp, pc, red->x));
280   PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
281   PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode PCDestroy_Redistribute(PC pc)
286 {
287   PC_Redistribute *red = (PC_Redistribute *)pc->data;
288 
289   PetscFunctionBegin;
290   PetscCall(VecScatterDestroy(&red->scatter));
291   PetscCall(ISDestroy(&red->is));
292   PetscCall(VecDestroy(&red->b));
293   PetscCall(VecDestroy(&red->x));
294   PetscCall(KSPDestroy(&red->ksp));
295   PetscCall(VecDestroy(&red->work));
296   PetscCall(PetscFree(red->drows));
297   PetscCall(PetscFree(red->diag));
298   PetscCall(PetscFree(pc->data));
299   PetscFunctionReturn(0);
300 }
301 
302 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
303 {
304   PC_Redistribute *red = (PC_Redistribute *)pc->data;
305 
306   PetscFunctionBegin;
307   PetscCall(KSPSetFromOptions(red->ksp));
308   PetscFunctionReturn(0);
309 }
310 
311 /*@
312    PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`
313 
314    Not Collective
315 
316    Input Parameter:
317 .  pc - the preconditioner context
318 
319    Output Parameter:
320 .  innerksp - the inner `KSP`
321 
322    Level: advanced
323 
324 .seealso: `KSP`, `PCREDISTRIBUTE`
325 @*/
326 PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
327 {
328   PC_Redistribute *red = (PC_Redistribute *)pc->data;
329 
330   PetscFunctionBegin;
331   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
332   PetscValidPointer(innerksp, 2);
333   *innerksp = red->ksp;
334   PetscFunctionReturn(0);
335 }
336 
337 /*MC
338      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
339      applies a `KSP` to that new smaller matrix
340 
341      Level: intermediate
342 
343      Notes:
344      Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_
345 
346      Usually run this with `-ksp_type preonly`
347 
348      If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, `-ksp_type preonly
349      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc` to take advantage of the symmetry.
350 
351      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. Rows are moved
352      between MPI processes inside the preconditioner to balance the number of rows on each process.
353 
354      Developer Note:
355      Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
356 
357 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
358 M*/
359 
360 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
361 {
362   PC_Redistribute *red;
363   const char      *prefix;
364 
365   PetscFunctionBegin;
366   PetscCall(PetscNew(&red));
367   pc->data = (void *)red;
368 
369   pc->ops->apply          = PCApply_Redistribute;
370   pc->ops->applytranspose = NULL;
371   pc->ops->setup          = PCSetUp_Redistribute;
372   pc->ops->destroy        = PCDestroy_Redistribute;
373   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
374   pc->ops->view           = PCView_Redistribute;
375 
376   PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp));
377   PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure));
378   PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1));
379   PetscCall(PCGetOptionsPrefix(pc, &prefix));
380   PetscCall(KSPSetOptionsPrefix(red->ksp, prefix));
381   PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_"));
382   PetscFunctionReturn(0);
383 }
384