1 2 /* 3 This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner. 4 */ 5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 6 #include <petscksp.h> 7 8 typedef struct { 9 KSP ksp; 10 Vec x, b; 11 VecScatter scatter; 12 IS is; 13 PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */ 14 PetscScalar *diag; 15 Vec work; 16 } PC_Redistribute; 17 18 static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer) { 19 PC_Redistribute *red = (PC_Redistribute *)pc->data; 20 PetscBool iascii, isstring; 21 PetscInt ncnt, N; 22 23 PetscFunctionBegin; 24 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 25 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 26 if (iascii) { 27 PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 28 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 29 PetscCall(PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)))); 30 PetscCall(PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n")); 31 PetscCall(KSPView(red->ksp, viewer)); 32 } else if (isstring) { 33 PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner")); 34 PetscCall(KSPView(red->ksp, viewer)); 35 } 36 PetscFunctionReturn(0); 37 } 38 39 static PetscErrorCode PCSetUp_Redistribute(PC pc) { 40 PC_Redistribute *red = (PC_Redistribute *)pc->data; 41 MPI_Comm comm; 42 PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows; 43 PetscLayout map, nmap; 44 PetscMPIInt size, tag, n; 45 PETSC_UNUSED PetscMPIInt imdex; 46 PetscInt *source = NULL; 47 PetscMPIInt *sizes = NULL, nrecvs; 48 PetscInt j, nsends; 49 PetscInt *owner = NULL, *starts = NULL, count, slen; 50 PetscInt *rvalues, *svalues, recvtotal; 51 PetscMPIInt *onodes1, *olengths1; 52 MPI_Request *send_waits = NULL, *recv_waits = NULL; 53 MPI_Status recv_status, *send_status; 54 Vec tvec, diag; 55 Mat tmat; 56 const PetscScalar *d, *values; 57 const PetscInt *cols; 58 59 PetscFunctionBegin; 60 if (pc->setupcalled) { 61 PetscCall(KSPGetOperators(red->ksp, NULL, &tmat)); 62 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat)); 63 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 64 } else { 65 PetscInt NN; 66 67 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 68 PetscCallMPI(MPI_Comm_size(comm, &size)); 69 PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag)); 70 71 /* count non-diagonal rows on process */ 72 PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend)); 73 cnt = 0; 74 for (i = rstart; i < rend; i++) { 75 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 76 for (PetscInt j = 0; j < nz; j++) { 77 if (values[j] != 0 && cols[j] != i) { 78 cnt++; 79 break; 80 } 81 } 82 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 83 } 84 PetscCall(PetscMalloc1(cnt, &rows)); 85 PetscCall(PetscMalloc1(rend - rstart - cnt, &drows)); 86 87 /* list non-diagonal rows on process */ 88 cnt = 0; 89 dcnt = 0; 90 for (i = rstart; i < rend; i++) { 91 PetscBool diagonly = PETSC_TRUE; 92 PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values)); 93 for (PetscInt j = 0; j < nz; j++) { 94 if (values[j] != 0 && cols[j] != i) { 95 diagonly = PETSC_FALSE; 96 break; 97 } 98 } 99 if (!diagonly) rows[cnt++] = i; 100 else drows[dcnt++] = i - rstart; 101 PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values)); 102 } 103 104 /* create PetscLayout for non-diagonal rows on each process */ 105 PetscCall(PetscLayoutCreate(comm, &map)); 106 PetscCall(PetscLayoutSetLocalSize(map, cnt)); 107 PetscCall(PetscLayoutSetBlockSize(map, 1)); 108 PetscCall(PetscLayoutSetUp(map)); 109 rstart = map->rstart; 110 rend = map->rend; 111 112 /* create PetscLayout for load-balanced non-diagonal rows on each process */ 113 PetscCall(PetscLayoutCreate(comm, &nmap)); 114 PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm)); 115 PetscCall(PetscLayoutSetSize(nmap, ncnt)); 116 PetscCall(PetscLayoutSetBlockSize(nmap, 1)); 117 PetscCall(PetscLayoutSetUp(nmap)); 118 119 PetscCall(MatGetSize(pc->pmat, &NN, NULL)); 120 PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))))); 121 122 if (size > 1) { 123 /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */ 124 /* 125 this code is taken from VecScatterCreate_PtoS() 126 Determines what rows need to be moved where to 127 load balance the non-diagonal rows 128 */ 129 /* count number of contributors to each processor */ 130 PetscCall(PetscMalloc2(size, &sizes, cnt, &owner)); 131 PetscCall(PetscArrayzero(sizes, size)); 132 j = 0; 133 nsends = 0; 134 for (i = rstart; i < rend; i++) { 135 if (i < nmap->range[j]) j = 0; 136 for (; j < size; j++) { 137 if (i < nmap->range[j + 1]) { 138 if (!sizes[j]++) nsends++; 139 owner[i - rstart] = j; 140 break; 141 } 142 } 143 } 144 /* inform other processors of number of messages and max length*/ 145 PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs)); 146 PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1)); 147 PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1)); 148 recvtotal = 0; 149 for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i]; 150 151 /* post receives: rvalues - rows I will own; count - nu */ 152 PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits)); 153 count = 0; 154 for (i = 0; i < nrecvs; i++) { 155 PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i)); 156 count += olengths1[i]; 157 } 158 159 /* do sends: 160 1) starts[i] gives the starting index in svalues for stuff going to 161 the ith processor 162 */ 163 PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts)); 164 starts[0] = 0; 165 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 166 for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i]; 167 for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart; 168 red->drows = drows; 169 red->dcnt = dcnt; 170 PetscCall(PetscFree(rows)); 171 172 starts[0] = 0; 173 for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1]; 174 count = 0; 175 for (i = 0; i < size; i++) { 176 if (sizes[i]) { PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++)); } 177 } 178 179 /* wait on receives */ 180 count = nrecvs; 181 slen = 0; 182 while (count) { 183 PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status)); 184 /* unpack receives into our local space */ 185 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n)); 186 slen += n; 187 count--; 188 } 189 PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal); 190 PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is)); 191 192 /* free all work space */ 193 PetscCall(PetscFree(olengths1)); 194 PetscCall(PetscFree(onodes1)); 195 PetscCall(PetscFree3(rvalues, source, recv_waits)); 196 PetscCall(PetscFree2(sizes, owner)); 197 if (nsends) { /* wait on sends */ 198 PetscCall(PetscMalloc1(nsends, &send_status)); 199 PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status)); 200 PetscCall(PetscFree(send_status)); 201 } 202 PetscCall(PetscFree3(svalues, send_waits, starts)); 203 } else { 204 PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is)); 205 red->drows = drows; 206 red->dcnt = dcnt; 207 slen = cnt; 208 } 209 PetscCall(PetscLayoutDestroy(&map)); 210 PetscCall(PetscLayoutDestroy(&nmap)); 211 212 PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b)); 213 PetscCall(VecDuplicate(red->b, &red->x)); 214 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 215 PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter)); 216 PetscCall(VecDestroy(&tvec)); 217 PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat)); 218 PetscCall(KSPSetOperators(red->ksp, tmat, tmat)); 219 PetscCall(MatDestroy(&tmat)); 220 } 221 222 /* get diagonal portion of matrix */ 223 PetscCall(PetscFree(red->diag)); 224 PetscCall(PetscMalloc1(red->dcnt, &red->diag)); 225 PetscCall(MatCreateVecs(pc->pmat, &diag, NULL)); 226 PetscCall(MatGetDiagonal(pc->pmat, diag)); 227 PetscCall(VecGetArrayRead(diag, &d)); 228 for (i = 0; i < red->dcnt; i++) red->diag[i] = 1.0 / d[red->drows[i]]; 229 PetscCall(VecRestoreArrayRead(diag, &d)); 230 PetscCall(VecDestroy(&diag)); 231 PetscCall(KSPSetUp(red->ksp)); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x) { 236 PC_Redistribute *red = (PC_Redistribute *)pc->data; 237 PetscInt dcnt = red->dcnt, i; 238 const PetscInt *drows = red->drows; 239 PetscScalar *xwork; 240 const PetscScalar *bwork, *diag = red->diag; 241 242 PetscFunctionBegin; 243 if (!red->work) { PetscCall(VecDuplicate(b, &red->work)); } 244 /* compute the rows of solution that have diagonal entries only */ 245 PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */ 246 PetscCall(VecGetArray(x, &xwork)); 247 PetscCall(VecGetArrayRead(b, &bwork)); 248 for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]]; 249 PetscCall(PetscLogFlops(dcnt)); 250 PetscCall(VecRestoreArray(red->work, &xwork)); 251 PetscCall(VecRestoreArrayRead(b, &bwork)); 252 /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */ 253 PetscCall(MatMult(pc->pmat, x, red->work)); 254 PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */ 255 256 PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 257 PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD)); 258 PetscCall(KSPSolve(red->ksp, red->b, red->x)); 259 PetscCall(KSPCheckSolve(red->ksp, pc, red->x)); 260 PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 261 PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE)); 262 PetscFunctionReturn(0); 263 } 264 265 static PetscErrorCode PCDestroy_Redistribute(PC pc) { 266 PC_Redistribute *red = (PC_Redistribute *)pc->data; 267 268 PetscFunctionBegin; 269 PetscCall(VecScatterDestroy(&red->scatter)); 270 PetscCall(ISDestroy(&red->is)); 271 PetscCall(VecDestroy(&red->b)); 272 PetscCall(VecDestroy(&red->x)); 273 PetscCall(KSPDestroy(&red->ksp)); 274 PetscCall(VecDestroy(&red->work)); 275 PetscCall(PetscFree(red->drows)); 276 PetscCall(PetscFree(red->diag)); 277 PetscCall(PetscFree(pc->data)); 278 PetscFunctionReturn(0); 279 } 280 281 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject) { 282 PC_Redistribute *red = (PC_Redistribute *)pc->data; 283 284 PetscFunctionBegin; 285 PetscCall(KSPSetFromOptions(red->ksp)); 286 PetscFunctionReturn(0); 287 } 288 289 /*@ 290 PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE 291 292 Not Collective 293 294 Input Parameter: 295 . pc - the preconditioner context 296 297 Output Parameter: 298 . innerksp - the inner KSP 299 300 Level: advanced 301 302 @*/ 303 PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp) { 304 PC_Redistribute *red = (PC_Redistribute *)pc->data; 305 306 PetscFunctionBegin; 307 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 308 PetscValidPointer(innerksp, 2); 309 *innerksp = red->ksp; 310 PetscFunctionReturn(0); 311 } 312 313 /* -------------------------------------------------------------------------------------*/ 314 /*MC 315 PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then 316 applys a KSP to that new smaller matrix 317 318 Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values> 319 320 Notes: 321 Usually run this with -ksp_type preonly 322 323 If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly 324 -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry. 325 326 This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. 327 328 Developer Notes: 329 Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication. 330 331 Level: intermediate 332 333 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()` 334 M*/ 335 336 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc) { 337 PC_Redistribute *red; 338 const char *prefix; 339 340 PetscFunctionBegin; 341 PetscCall(PetscNewLog(pc, &red)); 342 pc->data = (void *)red; 343 344 pc->ops->apply = PCApply_Redistribute; 345 pc->ops->applytranspose = NULL; 346 pc->ops->setup = PCSetUp_Redistribute; 347 pc->ops->destroy = PCDestroy_Redistribute; 348 pc->ops->setfromoptions = PCSetFromOptions_Redistribute; 349 pc->ops->view = PCView_Redistribute; 350 351 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp)); 352 PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure)); 353 PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1)); 354 PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)red->ksp)); 355 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 356 PetscCall(KSPSetOptionsPrefix(red->ksp, prefix)); 357 PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_")); 358 PetscFunctionReturn(0); 359 } 360