1 2 /* 3 This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner. 4 */ 5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 6 #include <petscksp.h> 7 8 typedef struct { 9 KSP ksp; 10 Vec x, b; 11 VecScatter scatter; 12 IS is; 13 PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */ 14 PetscScalar *diag; 15 Vec work; 16 PetscBool zerodiag; 17 } PC_Redistribute; 18 19 static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer) 20 { 21 PC_Redistribute *red = (PC_Redistribute *)pc->data; 22 PetscBool iascii, isstring; 23 PetscInt ncnt, N; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 27 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 28 if (iascii) { 29 PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 30 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 31 PetscCall(PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)))); 32 PetscCall(PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n")); 33 PetscCall(KSPView(red->ksp, viewer)); 34 } else if (isstring) { 35 PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner")); 36 PetscCall(KSPView(red->ksp, viewer)); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 static PetscErrorCode PCSetUp_Redistribute(PC pc) 42 { 43 PC_Redistribute *red = (PC_Redistribute *)pc->data; 44 MPI_Comm comm; 45 PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows; 46 PetscLayout map, nmap; 47 PetscMPIInt size, tag, n; 48 PETSC_UNUSED PetscMPIInt imdex; 49 PetscInt *source = NULL; 50 PetscMPIInt *sizes = NULL, nrecvs; 51 PetscInt j, nsends; 52 PetscInt *owner = NULL, *starts = NULL, count, slen; 53 PetscInt *rvalues, *svalues, recvtotal; 54 PetscMPIInt *onodes1, *olengths1; 55 MPI_Request *send_waits = NULL, *recv_waits = NULL; 56 MPI_Status recv_status, *send_status; 57 Vec tvec, diag; 58 Mat tmat; 59 const PetscScalar *d, *values; 60 const PetscInt *cols; 61 62 PetscFunctionBegin; 63 if (pc->setupcalled) { 64 PetscCall(KSPGetOperators(red->ksp, NULL, &tmat)); 65 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat)); 66 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 67 } else { 68 PetscInt NN; 69 70 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 71 PetscCallMPI(MPI_Comm_size(comm, &size)); 72 PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag)); 73 74 /* count non-diagonal rows on process */ 75 PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend)); 76 cnt = 0; 77 for (i = rstart; i < rend; i++) { 78 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 79 for (PetscInt j = 0; j < nz; j++) { 80 if (values[j] != 0 && cols[j] != i) { 81 cnt++; 82 break; 83 } 84 } 85 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 86 } 87 PetscCall(PetscMalloc1(cnt, &rows)); 88 PetscCall(PetscMalloc1(rend - rstart - cnt, &drows)); 89 90 /* list non-diagonal rows on process */ 91 cnt = 0; 92 dcnt = 0; 93 for (i = rstart; i < rend; i++) { 94 PetscBool diagonly = PETSC_TRUE; 95 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 96 for (PetscInt j = 0; j < nz; j++) { 97 if (values[j] != 0 && cols[j] != i) { 98 diagonly = PETSC_FALSE; 99 break; 100 } 101 } 102 if (!diagonly) rows[cnt++] = i; 103 else drows[dcnt++] = i - rstart; 104 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 105 } 106 107 /* create PetscLayout for non-diagonal rows on each process */ 108 PetscCall(PetscLayoutCreate(comm, &map)); 109 PetscCall(PetscLayoutSetLocalSize(map, cnt)); 110 PetscCall(PetscLayoutSetBlockSize(map, 1)); 111 PetscCall(PetscLayoutSetUp(map)); 112 rstart = map->rstart; 113 rend = map->rend; 114 115 /* create PetscLayout for load-balanced non-diagonal rows on each process */ 116 PetscCall(PetscLayoutCreate(comm, &nmap)); 117 PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm)); 118 PetscCall(PetscLayoutSetSize(nmap, ncnt)); 119 PetscCall(PetscLayoutSetBlockSize(nmap, 1)); 120 PetscCall(PetscLayoutSetUp(nmap)); 121 122 PetscCall(MatGetSize(pc->pmat, &NN, NULL)); 123 PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))))); 124 125 if (size > 1) { 126 /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */ 127 /* 128 this code is taken from VecScatterCreate_PtoS() 129 Determines what rows need to be moved where to 130 load balance the non-diagonal rows 131 */ 132 /* count number of contributors to each processor */ 133 PetscCall(PetscMalloc2(size, &sizes, cnt, &owner)); 134 PetscCall(PetscArrayzero(sizes, size)); 135 j = 0; 136 nsends = 0; 137 for (i = rstart; i < rend; i++) { 138 if (i < nmap->range[j]) j = 0; 139 for (; j < size; j++) { 140 if (i < nmap->range[j + 1]) { 141 if (!sizes[j]++) nsends++; 142 owner[i - rstart] = j; 143 break; 144 } 145 } 146 } 147 /* inform other processors of number of messages and max length*/ 148 PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs)); 149 PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1)); 150 PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1)); 151 recvtotal = 0; 152 for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i]; 153 154 /* post receives: rvalues - rows I will own; count - nu */ 155 PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits)); 156 count = 0; 157 for (i = 0; i < nrecvs; i++) { 158 PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i)); 159 count += olengths1[i]; 160 } 161 162 /* do sends: 163 1) starts[i] gives the starting index in svalues for stuff going to 164 the ith processor 165 */ 166 PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts)); 167 starts[0] = 0; 168 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 169 for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i]; 170 for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart; 171 red->drows = drows; 172 red->dcnt = dcnt; 173 PetscCall(PetscFree(rows)); 174 175 starts[0] = 0; 176 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 177 count = 0; 178 for (i = 0; i < size; i++) { 179 if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++)); 180 } 181 182 /* wait on receives */ 183 count = nrecvs; 184 slen = 0; 185 while (count) { 186 PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status)); 187 /* unpack receives into our local space */ 188 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n)); 189 slen += n; 190 count--; 191 } 192 PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal); 193 PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is)); 194 195 /* free all work space */ 196 PetscCall(PetscFree(olengths1)); 197 PetscCall(PetscFree(onodes1)); 198 PetscCall(PetscFree3(rvalues, source, recv_waits)); 199 PetscCall(PetscFree2(sizes, owner)); 200 if (nsends) { /* wait on sends */ 201 PetscCall(PetscMalloc1(nsends, &send_status)); 202 PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status)); 203 PetscCall(PetscFree(send_status)); 204 } 205 PetscCall(PetscFree3(svalues, send_waits, starts)); 206 } else { 207 PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is)); 208 red->drows = drows; 209 red->dcnt = dcnt; 210 slen = cnt; 211 } 212 PetscCall(PetscLayoutDestroy(&map)); 213 PetscCall(PetscLayoutDestroy(&nmap)); 214 215 PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b)); 216 PetscCall(VecDuplicate(red->b, &red->x)); 217 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 218 PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter)); 219 PetscCall(VecDestroy(&tvec)); 220 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat)); 221 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 222 PetscCall(MatDestroy(&tmat)); 223 } 224 225 /* get diagonal portion of matrix */ 226 PetscCall(PetscFree(red->diag)); 227 PetscCall(PetscMalloc1(red->dcnt, &red->diag)); 228 PetscCall(MatCreateVecs(pc->pmat, &diag, NULL)); 229 PetscCall(MatGetDiagonal(pc->pmat, diag)); 230 PetscCall(VecGetArrayRead(diag, &d)); 231 for (i = 0; i < red->dcnt; i++) { 232 if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]]; 233 else { 234 red->zerodiag = PETSC_TRUE; 235 red->diag[i] = 0.0; 236 } 237 } 238 PetscCall(VecRestoreArrayRead(diag, &d)); 239 PetscCall(VecDestroy(&diag)); 240 PetscCall(KSPSetUp(red->ksp)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x) 245 { 246 PC_Redistribute *red = (PC_Redistribute *)pc->data; 247 PetscInt dcnt = red->dcnt, i; 248 const PetscInt *drows = red->drows; 249 PetscScalar *xwork; 250 const PetscScalar *bwork, *diag = red->diag; 251 252 PetscFunctionBegin; 253 if (!red->work) PetscCall(VecDuplicate(b, &red->work)); 254 /* compute the rows of solution that have diagonal entries only */ 255 PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */ 256 PetscCall(VecGetArray(x, &xwork)); 257 PetscCall(VecGetArrayRead(b, &bwork)); 258 if (red->zerodiag) { 259 for (i = 0; i < dcnt; i++) { 260 if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) { 261 PetscCheck(!pc->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Linear system is inconsistent, zero matrix row but nonzero right hand side"); 262 PetscCall(PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side")); 263 PetscCall(VecSetInf(x)); 264 pc->failedreasonrank = PC_INCONSISTENT_RHS; 265 } 266 } 267 } 268 for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]]; 269 PetscCall(PetscLogFlops(dcnt)); 270 PetscCall(VecRestoreArray(red->work, &xwork)); 271 PetscCall(VecRestoreArrayRead(b, &bwork)); 272 /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */ 273 PetscCall(MatMult(pc->pmat, x, red->work)); 274 PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */ 275 276 PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 277 PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 278 PetscCall(KSPSolve(red->ksp, red->b, red->x)); 279 PetscCall(KSPCheckSolve(red->ksp, pc, red->x)); 280 PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 281 PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 282 PetscFunctionReturn(0); 283 } 284 285 static PetscErrorCode PCDestroy_Redistribute(PC pc) 286 { 287 PC_Redistribute *red = (PC_Redistribute *)pc->data; 288 289 PetscFunctionBegin; 290 PetscCall(VecScatterDestroy(&red->scatter)); 291 PetscCall(ISDestroy(&red->is)); 292 PetscCall(VecDestroy(&red->b)); 293 PetscCall(VecDestroy(&red->x)); 294 PetscCall(KSPDestroy(&red->ksp)); 295 PetscCall(VecDestroy(&red->work)); 296 PetscCall(PetscFree(red->drows)); 297 PetscCall(PetscFree(red->diag)); 298 PetscCall(PetscFree(pc->data)); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject) 303 { 304 PC_Redistribute *red = (PC_Redistribute *)pc->data; 305 306 PetscFunctionBegin; 307 PetscCall(KSPSetFromOptions(red->ksp)); 308 PetscFunctionReturn(0); 309 } 310 311 /*@ 312 PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE` 313 314 Not Collective 315 316 Input Parameter: 317 . pc - the preconditioner context 318 319 Output Parameter: 320 . innerksp - the inner `KSP` 321 322 Level: advanced 323 324 .seealso: `KSP`, `PCREDISTRIBUTE` 325 @*/ 326 PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp) 327 { 328 PC_Redistribute *red = (PC_Redistribute *)pc->data; 329 330 PetscFunctionBegin; 331 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 332 PetscValidPointer(innerksp, 2); 333 *innerksp = red->ksp; 334 PetscFunctionReturn(0); 335 } 336 337 /*MC 338 PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then 339 applies a `KSP` to that new smaller matrix 340 341 Level: intermediate 342 343 Notes: 344 Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_ 345 346 Usually run this with `-ksp_type preonly` 347 348 If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, `-ksp_type preonly 349 -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc` to take advantage of the symmetry. 350 351 This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. Rows are moved 352 between MPI processes inside the preconditioner to balance the number of rows on each process. 353 354 Developer Note: 355 Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication. 356 357 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()` 358 M*/ 359 360 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc) 361 { 362 PC_Redistribute *red; 363 const char *prefix; 364 365 PetscFunctionBegin; 366 PetscCall(PetscNew(&red)); 367 pc->data = (void *)red; 368 369 pc->ops->apply = PCApply_Redistribute; 370 pc->ops->applytranspose = NULL; 371 pc->ops->setup = PCSetUp_Redistribute; 372 pc->ops->destroy = PCDestroy_Redistribute; 373 pc->ops->setfromoptions = PCSetFromOptions_Redistribute; 374 pc->ops->view = PCView_Redistribute; 375 376 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp)); 377 PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure)); 378 PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1)); 379 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 380 PetscCall(KSPSetOptionsPrefix(red->ksp, prefix)); 381 PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_")); 382 PetscFunctionReturn(0); 383 } 384