1 2 /* 3 This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner. 4 */ 5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 6 #include <petscksp.h> 7 8 typedef struct { 9 KSP ksp; 10 Vec x, b; 11 VecScatter scatter; 12 IS is; 13 PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */ 14 PetscScalar *diag; 15 Vec work; 16 PetscBool zerodiag; 17 } PC_Redistribute; 18 19 static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer) 20 { 21 PC_Redistribute *red = (PC_Redistribute *)pc->data; 22 PetscBool iascii, isstring; 23 PetscInt ncnt, N; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 27 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 28 if (iascii) { 29 PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 30 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 31 PetscCall(PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)))); 32 PetscCall(PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n")); 33 PetscCall(KSPView(red->ksp, viewer)); 34 } else if (isstring) { 35 PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner")); 36 PetscCall(KSPView(red->ksp, viewer)); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 static PetscErrorCode PCSetUp_Redistribute(PC pc) 42 { 43 PC_Redistribute *red = (PC_Redistribute *)pc->data; 44 MPI_Comm comm; 45 PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows; 46 PetscLayout map, nmap; 47 PetscMPIInt size, tag, n; 48 PETSC_UNUSED PetscMPIInt imdex; 49 PetscInt *source = NULL; 50 PetscMPIInt *sizes = NULL, nrecvs; 51 PetscInt j, nsends; 52 PetscInt *owner = NULL, *starts = NULL, count, slen; 53 PetscInt *rvalues, *svalues, recvtotal; 54 PetscMPIInt *onodes1, *olengths1; 55 MPI_Request *send_waits = NULL, *recv_waits = NULL; 56 MPI_Status recv_status, *send_status; 57 Vec tvec, diag; 58 Mat tmat; 59 const PetscScalar *d, *values; 60 const PetscInt *cols; 61 62 PetscFunctionBegin; 63 if (pc->setupcalled) { 64 PetscCall(KSPGetOperators(red->ksp, NULL, &tmat)); 65 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat)); 66 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 67 } else { 68 PetscInt NN; 69 70 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 71 PetscCallMPI(MPI_Comm_size(comm, &size)); 72 PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag)); 73 74 /* count non-diagonal rows on process */ 75 PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend)); 76 cnt = 0; 77 for (i = rstart; i < rend; i++) { 78 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 79 for (PetscInt j = 0; j < nz; j++) { 80 if (values[j] != 0 && cols[j] != i) { 81 cnt++; 82 break; 83 } 84 } 85 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 86 } 87 PetscCall(PetscMalloc1(cnt, &rows)); 88 PetscCall(PetscMalloc1(rend - rstart - cnt, &drows)); 89 90 /* list non-diagonal rows on process */ 91 cnt = 0; 92 dcnt = 0; 93 for (i = rstart; i < rend; i++) { 94 PetscBool diagonly = PETSC_TRUE; 95 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 96 for (PetscInt j = 0; j < nz; j++) { 97 if (values[j] != 0 && cols[j] != i) { 98 diagonly = PETSC_FALSE; 99 break; 100 } 101 } 102 if (!diagonly) rows[cnt++] = i; 103 else drows[dcnt++] = i - rstart; 104 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 105 } 106 107 /* create PetscLayout for non-diagonal rows on each process */ 108 PetscCall(PetscLayoutCreate(comm, &map)); 109 PetscCall(PetscLayoutSetLocalSize(map, cnt)); 110 PetscCall(PetscLayoutSetBlockSize(map, 1)); 111 PetscCall(PetscLayoutSetUp(map)); 112 rstart = map->rstart; 113 rend = map->rend; 114 115 /* create PetscLayout for load-balanced non-diagonal rows on each process */ 116 PetscCall(PetscLayoutCreate(comm, &nmap)); 117 PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm)); 118 PetscCall(PetscLayoutSetSize(nmap, ncnt)); 119 PetscCall(PetscLayoutSetBlockSize(nmap, 1)); 120 PetscCall(PetscLayoutSetUp(nmap)); 121 122 PetscCall(MatGetSize(pc->pmat, &NN, NULL)); 123 PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))))); 124 125 if (size > 1) { 126 /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */ 127 /* 128 this code is taken from VecScatterCreate_PtoS() 129 Determines what rows need to be moved where to 130 load balance the non-diagonal rows 131 */ 132 /* count number of contributors to each processor */ 133 PetscCall(PetscMalloc2(size, &sizes, cnt, &owner)); 134 PetscCall(PetscArrayzero(sizes, size)); 135 j = 0; 136 nsends = 0; 137 for (i = rstart; i < rend; i++) { 138 if (i < nmap->range[j]) j = 0; 139 for (; j < size; j++) { 140 if (i < nmap->range[j + 1]) { 141 if (!sizes[j]++) nsends++; 142 owner[i - rstart] = j; 143 break; 144 } 145 } 146 } 147 /* inform other processors of number of messages and max length*/ 148 PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs)); 149 PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1)); 150 PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1)); 151 recvtotal = 0; 152 for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i]; 153 154 /* post receives: rvalues - rows I will own; count - nu */ 155 PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits)); 156 count = 0; 157 for (i = 0; i < nrecvs; i++) { 158 PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i)); 159 count += olengths1[i]; 160 } 161 162 /* do sends: 163 1) starts[i] gives the starting index in svalues for stuff going to 164 the ith processor 165 */ 166 PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts)); 167 starts[0] = 0; 168 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 169 for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i]; 170 for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart; 171 red->drows = drows; 172 red->dcnt = dcnt; 173 PetscCall(PetscFree(rows)); 174 175 starts[0] = 0; 176 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 177 count = 0; 178 for (i = 0; i < size; i++) { 179 if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++)); 180 } 181 182 /* wait on receives */ 183 count = nrecvs; 184 slen = 0; 185 while (count) { 186 PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status)); 187 /* unpack receives into our local space */ 188 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n)); 189 slen += n; 190 count--; 191 } 192 PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal); 193 PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is)); 194 195 /* free all work space */ 196 PetscCall(PetscFree(olengths1)); 197 PetscCall(PetscFree(onodes1)); 198 PetscCall(PetscFree3(rvalues, source, recv_waits)); 199 PetscCall(PetscFree2(sizes, owner)); 200 if (nsends) { /* wait on sends */ 201 PetscCall(PetscMalloc1(nsends, &send_status)); 202 PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status)); 203 PetscCall(PetscFree(send_status)); 204 } 205 PetscCall(PetscFree3(svalues, send_waits, starts)); 206 } else { 207 PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is)); 208 red->drows = drows; 209 red->dcnt = dcnt; 210 slen = cnt; 211 } 212 PetscCall(PetscLayoutDestroy(&map)); 213 PetscCall(PetscLayoutDestroy(&nmap)); 214 215 PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b)); 216 PetscCall(VecDuplicate(red->b, &red->x)); 217 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 218 PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter)); 219 PetscCall(VecDestroy(&tvec)); 220 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat)); 221 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 222 PetscCall(MatDestroy(&tmat)); 223 } 224 225 /* get diagonal portion of matrix */ 226 PetscCall(PetscFree(red->diag)); 227 PetscCall(PetscMalloc1(red->dcnt, &red->diag)); 228 PetscCall(MatCreateVecs(pc->pmat, &diag, NULL)); 229 PetscCall(MatGetDiagonal(pc->pmat, diag)); 230 PetscCall(VecGetArrayRead(diag, &d)); 231 for (i = 0; i < red->dcnt; i++) { 232 if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]]; 233 else { 234 red->zerodiag = PETSC_TRUE; 235 red->diag[i] = 0.0; 236 } 237 } 238 PetscCall(VecRestoreArrayRead(diag, &d)); 239 PetscCall(VecDestroy(&diag)); 240 PetscCall(KSPSetUp(red->ksp)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x) 245 { 246 PC_Redistribute *red = (PC_Redistribute *)pc->data; 247 PetscInt dcnt = red->dcnt, i; 248 const PetscInt *drows = red->drows; 249 PetscScalar *xwork; 250 const PetscScalar *bwork, *diag = red->diag; 251 252 PetscFunctionBegin; 253 if (!red->work) PetscCall(VecDuplicate(b, &red->work)); 254 /* compute the rows of solution that have diagonal entries only */ 255 PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */ 256 PetscCall(VecGetArray(x, &xwork)); 257 PetscCall(VecGetArrayRead(b, &bwork)); 258 if (red->zerodiag) { 259 for (i = 0; i < dcnt; i++) { 260 if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) { 261 PetscCheck(!pc->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Linear system is inconsistent, zero matrix row but nonzero right hand side"); 262 PetscCall(PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side")); 263 PetscCall(VecSetInf(x)); 264 pc->failedreasonrank = PC_INCONSISTENT_RHS; 265 } 266 } 267 } 268 for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]]; 269 PetscCall(PetscLogFlops(dcnt)); 270 PetscCall(VecRestoreArray(red->work, &xwork)); 271 PetscCall(VecRestoreArrayRead(b, &bwork)); 272 /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */ 273 PetscCall(MatMult(pc->pmat, x, red->work)); 274 PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */ 275 276 PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 277 PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 278 PetscCall(KSPSolve(red->ksp, red->b, red->x)); 279 PetscCall(KSPCheckSolve(red->ksp, pc, red->x)); 280 PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 281 PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 282 PetscFunctionReturn(0); 283 } 284 285 static PetscErrorCode PCDestroy_Redistribute(PC pc) 286 { 287 PC_Redistribute *red = (PC_Redistribute *)pc->data; 288 289 PetscFunctionBegin; 290 PetscCall(VecScatterDestroy(&red->scatter)); 291 PetscCall(ISDestroy(&red->is)); 292 PetscCall(VecDestroy(&red->b)); 293 PetscCall(VecDestroy(&red->x)); 294 PetscCall(KSPDestroy(&red->ksp)); 295 PetscCall(VecDestroy(&red->work)); 296 PetscCall(PetscFree(red->drows)); 297 PetscCall(PetscFree(red->diag)); 298 PetscCall(PetscFree(pc->data)); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject) 303 { 304 PC_Redistribute *red = (PC_Redistribute *)pc->data; 305 306 PetscFunctionBegin; 307 PetscCall(KSPSetFromOptions(red->ksp)); 308 PetscFunctionReturn(0); 309 } 310 311 /*@ 312 PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE` 313 314 Not Collective 315 316 Input Parameter: 317 . pc - the preconditioner context 318 319 Output Parameter: 320 . innerksp - the inner `KSP` 321 322 Level: advanced 323 324 .seealso: `KSP`, `PCREDISTRIBUTE` 325 @*/ 326 PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp) 327 { 328 PC_Redistribute *red = (PC_Redistribute *)pc->data; 329 330 PetscFunctionBegin; 331 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 332 PetscValidPointer(innerksp, 2); 333 *innerksp = red->ksp; 334 PetscFunctionReturn(0); 335 } 336 337 /*MC 338 PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then 339 applies a `KSP` to that new smaller matrix 340 341 Notes: 342 Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_ 343 344 Usually run this with -ksp_type preonly 345 346 If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly 347 -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry. 348 349 This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. 350 351 Developer Note: 352 Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication. 353 354 Level: intermediate 355 356 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()` 357 M*/ 358 359 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc) 360 { 361 PC_Redistribute *red; 362 const char *prefix; 363 364 PetscFunctionBegin; 365 PetscCall(PetscNew(&red)); 366 pc->data = (void *)red; 367 368 pc->ops->apply = PCApply_Redistribute; 369 pc->ops->applytranspose = NULL; 370 pc->ops->setup = PCSetUp_Redistribute; 371 pc->ops->destroy = PCDestroy_Redistribute; 372 pc->ops->setfromoptions = PCSetFromOptions_Redistribute; 373 pc->ops->view = PCView_Redistribute; 374 375 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp)); 376 PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure)); 377 PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1)); 378 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 379 PetscCall(KSPSetOptionsPrefix(red->ksp, prefix)); 380 PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_")); 381 PetscFunctionReturn(0); 382 } 383