1 2 /* 3 This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner. 4 */ 5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 6 #include <petscksp.h> 7 8 typedef struct { 9 KSP ksp; 10 Vec x, b; 11 VecScatter scatter; 12 IS is; 13 PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */ 14 PetscScalar *diag; 15 Vec work; 16 } PC_Redistribute; 17 18 static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer) 19 { 20 PC_Redistribute *red = (PC_Redistribute *)pc->data; 21 PetscBool iascii, isstring; 22 PetscInt ncnt, N; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 26 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 27 if (iascii) { 28 PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 29 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 30 PetscCall(PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)))); 31 PetscCall(PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n")); 32 PetscCall(KSPView(red->ksp, viewer)); 33 } else if (isstring) { 34 PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner")); 35 PetscCall(KSPView(red->ksp, viewer)); 36 } 37 PetscFunctionReturn(0); 38 } 39 40 static PetscErrorCode PCSetUp_Redistribute(PC pc) 41 { 42 PC_Redistribute *red = (PC_Redistribute *)pc->data; 43 MPI_Comm comm; 44 PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows; 45 PetscLayout map, nmap; 46 PetscMPIInt size, tag, n; 47 PETSC_UNUSED PetscMPIInt imdex; 48 PetscInt *source = NULL; 49 PetscMPIInt *sizes = NULL, nrecvs; 50 PetscInt j, nsends; 51 PetscInt *owner = NULL, *starts = NULL, count, slen; 52 PetscInt *rvalues, *svalues, recvtotal; 53 PetscMPIInt *onodes1, *olengths1; 54 MPI_Request *send_waits = NULL, *recv_waits = NULL; 55 MPI_Status recv_status, *send_status; 56 Vec tvec, diag; 57 Mat tmat; 58 const PetscScalar *d, *values; 59 const PetscInt *cols; 60 61 PetscFunctionBegin; 62 if (pc->setupcalled) { 63 PetscCall(KSPGetOperators(red->ksp, NULL, &tmat)); 64 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat)); 65 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 66 } else { 67 PetscInt NN; 68 69 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 70 PetscCallMPI(MPI_Comm_size(comm, &size)); 71 PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag)); 72 73 /* count non-diagonal rows on process */ 74 PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend)); 75 cnt = 0; 76 for (i = rstart; i < rend; i++) { 77 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 78 for (PetscInt j = 0; j < nz; j++) { 79 if (values[j] != 0 && cols[j] != i) { 80 cnt++; 81 break; 82 } 83 } 84 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 85 } 86 PetscCall(PetscMalloc1(cnt, &rows)); 87 PetscCall(PetscMalloc1(rend - rstart - cnt, &drows)); 88 89 /* list non-diagonal rows on process */ 90 cnt = 0; 91 dcnt = 0; 92 for (i = rstart; i < rend; i++) { 93 PetscBool diagonly = PETSC_TRUE; 94 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 95 for (PetscInt j = 0; j < nz; j++) { 96 if (values[j] != 0 && cols[j] != i) { 97 diagonly = PETSC_FALSE; 98 break; 99 } 100 } 101 if (!diagonly) rows[cnt++] = i; 102 else drows[dcnt++] = i - rstart; 103 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 104 } 105 106 /* create PetscLayout for non-diagonal rows on each process */ 107 PetscCall(PetscLayoutCreate(comm, &map)); 108 PetscCall(PetscLayoutSetLocalSize(map, cnt)); 109 PetscCall(PetscLayoutSetBlockSize(map, 1)); 110 PetscCall(PetscLayoutSetUp(map)); 111 rstart = map->rstart; 112 rend = map->rend; 113 114 /* create PetscLayout for load-balanced non-diagonal rows on each process */ 115 PetscCall(PetscLayoutCreate(comm, &nmap)); 116 PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm)); 117 PetscCall(PetscLayoutSetSize(nmap, ncnt)); 118 PetscCall(PetscLayoutSetBlockSize(nmap, 1)); 119 PetscCall(PetscLayoutSetUp(nmap)); 120 121 PetscCall(MatGetSize(pc->pmat, &NN, NULL)); 122 PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))))); 123 124 if (size > 1) { 125 /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */ 126 /* 127 this code is taken from VecScatterCreate_PtoS() 128 Determines what rows need to be moved where to 129 load balance the non-diagonal rows 130 */ 131 /* count number of contributors to each processor */ 132 PetscCall(PetscMalloc2(size, &sizes, cnt, &owner)); 133 PetscCall(PetscArrayzero(sizes, size)); 134 j = 0; 135 nsends = 0; 136 for (i = rstart; i < rend; i++) { 137 if (i < nmap->range[j]) j = 0; 138 for (; j < size; j++) { 139 if (i < nmap->range[j + 1]) { 140 if (!sizes[j]++) nsends++; 141 owner[i - rstart] = j; 142 break; 143 } 144 } 145 } 146 /* inform other processors of number of messages and max length*/ 147 PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs)); 148 PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1)); 149 PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1)); 150 recvtotal = 0; 151 for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i]; 152 153 /* post receives: rvalues - rows I will own; count - nu */ 154 PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits)); 155 count = 0; 156 for (i = 0; i < nrecvs; i++) { 157 PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i)); 158 count += olengths1[i]; 159 } 160 161 /* do sends: 162 1) starts[i] gives the starting index in svalues for stuff going to 163 the ith processor 164 */ 165 PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts)); 166 starts[0] = 0; 167 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 168 for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i]; 169 for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart; 170 red->drows = drows; 171 red->dcnt = dcnt; 172 PetscCall(PetscFree(rows)); 173 174 starts[0] = 0; 175 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 176 count = 0; 177 for (i = 0; i < size; i++) { 178 if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++)); 179 } 180 181 /* wait on receives */ 182 count = nrecvs; 183 slen = 0; 184 while (count) { 185 PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status)); 186 /* unpack receives into our local space */ 187 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n)); 188 slen += n; 189 count--; 190 } 191 PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal); 192 PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is)); 193 194 /* free all work space */ 195 PetscCall(PetscFree(olengths1)); 196 PetscCall(PetscFree(onodes1)); 197 PetscCall(PetscFree3(rvalues, source, recv_waits)); 198 PetscCall(PetscFree2(sizes, owner)); 199 if (nsends) { /* wait on sends */ 200 PetscCall(PetscMalloc1(nsends, &send_status)); 201 PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status)); 202 PetscCall(PetscFree(send_status)); 203 } 204 PetscCall(PetscFree3(svalues, send_waits, starts)); 205 } else { 206 PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is)); 207 red->drows = drows; 208 red->dcnt = dcnt; 209 slen = cnt; 210 } 211 PetscCall(PetscLayoutDestroy(&map)); 212 PetscCall(PetscLayoutDestroy(&nmap)); 213 214 PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b)); 215 PetscCall(VecDuplicate(red->b, &red->x)); 216 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 217 PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter)); 218 PetscCall(VecDestroy(&tvec)); 219 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat)); 220 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 221 PetscCall(MatDestroy(&tmat)); 222 } 223 224 /* get diagonal portion of matrix */ 225 PetscCall(PetscFree(red->diag)); 226 PetscCall(PetscMalloc1(red->dcnt, &red->diag)); 227 PetscCall(MatCreateVecs(pc->pmat, &diag, NULL)); 228 PetscCall(MatGetDiagonal(pc->pmat, diag)); 229 PetscCall(VecGetArrayRead(diag, &d)); 230 for (i = 0; i < red->dcnt; i++) red->diag[i] = 1.0 / d[red->drows[i]]; 231 PetscCall(VecRestoreArrayRead(diag, &d)); 232 PetscCall(VecDestroy(&diag)); 233 PetscCall(KSPSetUp(red->ksp)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x) 238 { 239 PC_Redistribute *red = (PC_Redistribute *)pc->data; 240 PetscInt dcnt = red->dcnt, i; 241 const PetscInt *drows = red->drows; 242 PetscScalar *xwork; 243 const PetscScalar *bwork, *diag = red->diag; 244 245 PetscFunctionBegin; 246 if (!red->work) PetscCall(VecDuplicate(b, &red->work)); 247 /* compute the rows of solution that have diagonal entries only */ 248 PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */ 249 PetscCall(VecGetArray(x, &xwork)); 250 PetscCall(VecGetArrayRead(b, &bwork)); 251 for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]]; 252 PetscCall(PetscLogFlops(dcnt)); 253 PetscCall(VecRestoreArray(red->work, &xwork)); 254 PetscCall(VecRestoreArrayRead(b, &bwork)); 255 /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */ 256 PetscCall(MatMult(pc->pmat, x, red->work)); 257 PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */ 258 259 PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 260 PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 261 PetscCall(KSPSolve(red->ksp, red->b, red->x)); 262 PetscCall(KSPCheckSolve(red->ksp, pc, red->x)); 263 PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 264 PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode PCDestroy_Redistribute(PC pc) 269 { 270 PC_Redistribute *red = (PC_Redistribute *)pc->data; 271 272 PetscFunctionBegin; 273 PetscCall(VecScatterDestroy(&red->scatter)); 274 PetscCall(ISDestroy(&red->is)); 275 PetscCall(VecDestroy(&red->b)); 276 PetscCall(VecDestroy(&red->x)); 277 PetscCall(KSPDestroy(&red->ksp)); 278 PetscCall(VecDestroy(&red->work)); 279 PetscCall(PetscFree(red->drows)); 280 PetscCall(PetscFree(red->diag)); 281 PetscCall(PetscFree(pc->data)); 282 PetscFunctionReturn(0); 283 } 284 285 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject) 286 { 287 PC_Redistribute *red = (PC_Redistribute *)pc->data; 288 289 PetscFunctionBegin; 290 PetscCall(KSPSetFromOptions(red->ksp)); 291 PetscFunctionReturn(0); 292 } 293 294 /*@ 295 PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE` 296 297 Not Collective 298 299 Input Parameter: 300 . pc - the preconditioner context 301 302 Output Parameter: 303 . innerksp - the inner `KSP` 304 305 Level: advanced 306 307 .seealso: `KSP`, `PCREDISTRIBUTE` 308 @*/ 309 PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp) 310 { 311 PC_Redistribute *red = (PC_Redistribute *)pc->data; 312 313 PetscFunctionBegin; 314 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 315 PetscValidPointer(innerksp, 2); 316 *innerksp = red->ksp; 317 PetscFunctionReturn(0); 318 } 319 320 /*MC 321 PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then 322 applies a `KSP` to that new smaller matrix 323 324 Notes: 325 Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_ 326 327 Usually run this with -ksp_type preonly 328 329 If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly 330 -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry. 331 332 This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. 333 334 Developer Note: 335 Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication. 336 337 Level: intermediate 338 339 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()` 340 M*/ 341 342 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc) 343 { 344 PC_Redistribute *red; 345 const char *prefix; 346 347 PetscFunctionBegin; 348 PetscCall(PetscNew(&red)); 349 pc->data = (void *)red; 350 351 pc->ops->apply = PCApply_Redistribute; 352 pc->ops->applytranspose = NULL; 353 pc->ops->setup = PCSetUp_Redistribute; 354 pc->ops->destroy = PCDestroy_Redistribute; 355 pc->ops->setfromoptions = PCSetFromOptions_Redistribute; 356 pc->ops->view = PCView_Redistribute; 357 358 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp)); 359 PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure)); 360 PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1)); 361 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 362 PetscCall(KSPSetOptionsPrefix(red->ksp, prefix)); 363 PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_")); 364 PetscFunctionReturn(0); 365 } 366