1 2 /* 3 This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner. 4 */ 5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 6 #include <petscksp.h> 7 8 typedef struct { 9 KSP ksp; 10 Vec x,b; 11 VecScatter scatter; 12 IS is; 13 PetscInt dcnt,*drows; /* these are the local rows that have only diagonal entry */ 14 PetscScalar *diag; 15 Vec work; 16 } PC_Redistribute; 17 18 static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer) 19 { 20 PC_Redistribute *red = (PC_Redistribute*)pc->data; 21 PetscBool iascii,isstring; 22 PetscInt ncnt,N; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 26 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring)); 27 if (iascii) { 28 PetscCall(MPIU_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 29 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 30 PetscCall(PetscViewerASCIIPrintf(viewer," Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n",ncnt,(double)(100.0*((PetscReal)ncnt)/((PetscReal)N)))); 31 PetscCall(PetscViewerASCIIPrintf(viewer," Redistribute preconditioner: \n")); 32 PetscCall(KSPView(red->ksp,viewer)); 33 } else if (isstring) { 34 PetscCall(PetscViewerStringSPrintf(viewer," Redistribute preconditioner")); 35 PetscCall(KSPView(red->ksp,viewer)); 36 } 37 PetscFunctionReturn(0); 38 } 39 40 static PetscErrorCode PCSetUp_Redistribute(PC pc) 41 { 42 PC_Redistribute *red = (PC_Redistribute*)pc->data; 43 MPI_Comm comm; 44 PetscInt rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows; 45 PetscLayout map,nmap; 46 PetscMPIInt size,tag,n; 47 PETSC_UNUSED PetscMPIInt imdex; 48 PetscInt *source = NULL; 49 PetscMPIInt *sizes = NULL,nrecvs; 50 PetscInt j,nsends; 51 PetscInt *owner = NULL,*starts = NULL,count,slen; 52 PetscInt *rvalues,*svalues,recvtotal; 53 PetscMPIInt *onodes1,*olengths1; 54 MPI_Request *send_waits = NULL,*recv_waits = NULL; 55 MPI_Status recv_status,*send_status; 56 Vec tvec,diag; 57 Mat tmat; 58 const PetscScalar *d,*values; 59 const PetscInt *cols; 60 61 PetscFunctionBegin; 62 if (pc->setupcalled) { 63 PetscCall(KSPGetOperators(red->ksp,NULL,&tmat)); 64 PetscCall(MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat)); 65 PetscCall(KSPSetOperators(red->ksp,tmat,tmat)); 66 } else { 67 PetscInt NN; 68 69 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 70 PetscCallMPI(MPI_Comm_size(comm,&size)); 71 PetscCall(PetscObjectGetNewTag((PetscObject)pc,&tag)); 72 73 /* count non-diagonal rows on process */ 74 PetscCall(MatGetOwnershipRange(pc->mat,&rstart,&rend)); 75 cnt = 0; 76 for (i=rstart; i<rend; i++) { 77 PetscCall(MatGetRow(pc->mat,i,&nz,&cols,&values)); 78 for (PetscInt j=0; j<nz; j++) { 79 if (values[j] != 0 && cols[j] != i) { 80 cnt++; 81 break; 82 } 83 } 84 PetscCall(MatRestoreRow(pc->mat,i,&nz,&cols,&values)); 85 } 86 PetscCall(PetscMalloc1(cnt,&rows)); 87 PetscCall(PetscMalloc1(rend - rstart - cnt,&drows)); 88 89 /* list non-diagonal rows on process */ 90 cnt = 0; dcnt = 0; 91 for (i=rstart; i<rend; i++) { 92 PetscBool diagonly = PETSC_TRUE; 93 PetscCall(MatGetRow(pc->mat,i,&nz,&cols,&values)); 94 for (PetscInt j=0; j<nz; j++) { 95 if (values[j] != 0 && cols[j] != i) { 96 diagonly = PETSC_FALSE; 97 break; 98 } 99 } 100 if (!diagonly) rows[cnt++] = i; 101 else drows[dcnt++] = i - rstart; 102 PetscCall(MatRestoreRow(pc->mat,i,&nz,&cols,&values)); 103 } 104 105 /* create PetscLayout for non-diagonal rows on each process */ 106 PetscCall(PetscLayoutCreate(comm,&map)); 107 PetscCall(PetscLayoutSetLocalSize(map,cnt)); 108 PetscCall(PetscLayoutSetBlockSize(map,1)); 109 PetscCall(PetscLayoutSetUp(map)); 110 rstart = map->rstart; 111 rend = map->rend; 112 113 /* create PetscLayout for load-balanced non-diagonal rows on each process */ 114 PetscCall(PetscLayoutCreate(comm,&nmap)); 115 PetscCall(MPIU_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm)); 116 PetscCall(PetscLayoutSetSize(nmap,ncnt)); 117 PetscCall(PetscLayoutSetBlockSize(nmap,1)); 118 PetscCall(PetscLayoutSetUp(nmap)); 119 120 PetscCall(MatGetSize(pc->pmat,&NN,NULL)); 121 PetscCall(PetscInfo(pc,"Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n",NN-ncnt,(double)(((PetscReal)(NN-ncnt))/((PetscReal)(NN))))); 122 123 if (size > 1) { 124 /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */ 125 /* 126 this code is taken from VecScatterCreate_PtoS() 127 Determines what rows need to be moved where to 128 load balance the non-diagonal rows 129 */ 130 /* count number of contributors to each processor */ 131 PetscCall(PetscMalloc2(size,&sizes,cnt,&owner)); 132 PetscCall(PetscArrayzero(sizes,size)); 133 j = 0; 134 nsends = 0; 135 for (i=rstart; i<rend; i++) { 136 if (i < nmap->range[j]) j = 0; 137 for (; j<size; j++) { 138 if (i < nmap->range[j+1]) { 139 if (!sizes[j]++) nsends++; 140 owner[i-rstart] = j; 141 break; 142 } 143 } 144 } 145 /* inform other processors of number of messages and max length*/ 146 PetscCall(PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs)); 147 PetscCall(PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1)); 148 PetscCall(PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1)); 149 recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i]; 150 151 /* post receives: rvalues - rows I will own; count - nu */ 152 PetscCall(PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits)); 153 count = 0; 154 for (i=0; i<nrecvs; i++) { 155 PetscCallMPI(MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i)); 156 count += olengths1[i]; 157 } 158 159 /* do sends: 160 1) starts[i] gives the starting index in svalues for stuff going to 161 the ith processor 162 */ 163 PetscCall(PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts)); 164 starts[0] = 0; 165 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1]; 166 for (i=0; i<cnt; i++) svalues[starts[owner[i]]++] = rows[i]; 167 for (i=0; i<cnt; i++) rows[i] = rows[i] - rstart; 168 red->drows = drows; 169 red->dcnt = dcnt; 170 PetscCall(PetscFree(rows)); 171 172 starts[0] = 0; 173 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1]; 174 count = 0; 175 for (i=0; i<size; i++) { 176 if (sizes[i]) { 177 PetscCallMPI(MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++)); 178 } 179 } 180 181 /* wait on receives */ 182 count = nrecvs; 183 slen = 0; 184 while (count) { 185 PetscCallMPI(MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status)); 186 /* unpack receives into our local space */ 187 PetscCallMPI(MPI_Get_count(&recv_status,MPIU_INT,&n)); 188 slen += n; 189 count--; 190 } 191 PetscCheck(slen == recvtotal,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT,slen,recvtotal); 192 PetscCall(ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is)); 193 194 /* free all work space */ 195 PetscCall(PetscFree(olengths1)); 196 PetscCall(PetscFree(onodes1)); 197 PetscCall(PetscFree3(rvalues,source,recv_waits)); 198 PetscCall(PetscFree2(sizes,owner)); 199 if (nsends) { /* wait on sends */ 200 PetscCall(PetscMalloc1(nsends,&send_status)); 201 PetscCallMPI(MPI_Waitall(nsends,send_waits,send_status)); 202 PetscCall(PetscFree(send_status)); 203 } 204 PetscCall(PetscFree3(svalues,send_waits,starts)); 205 } else { 206 PetscCall(ISCreateGeneral(comm,cnt,rows,PETSC_OWN_POINTER,&red->is)); 207 red->drows = drows; 208 red->dcnt = dcnt; 209 slen = cnt; 210 } 211 PetscCall(PetscLayoutDestroy(&map)); 212 PetscCall(PetscLayoutDestroy(&nmap)); 213 214 PetscCall(VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b)); 215 PetscCall(VecDuplicate(red->b,&red->x)); 216 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 217 PetscCall(VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter)); 218 PetscCall(VecDestroy(&tvec)); 219 PetscCall(MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat)); 220 PetscCall(KSPSetOperators(red->ksp,tmat,tmat)); 221 PetscCall(MatDestroy(&tmat)); 222 } 223 224 /* get diagonal portion of matrix */ 225 PetscCall(PetscFree(red->diag)); 226 PetscCall(PetscMalloc1(red->dcnt,&red->diag)); 227 PetscCall(MatCreateVecs(pc->pmat,&diag,NULL)); 228 PetscCall(MatGetDiagonal(pc->pmat,diag)); 229 PetscCall(VecGetArrayRead(diag,&d)); 230 for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]]; 231 PetscCall(VecRestoreArrayRead(diag,&d)); 232 PetscCall(VecDestroy(&diag)); 233 PetscCall(KSPSetUp(red->ksp)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x) 238 { 239 PC_Redistribute *red = (PC_Redistribute*)pc->data; 240 PetscInt dcnt = red->dcnt,i; 241 const PetscInt *drows = red->drows; 242 PetscScalar *xwork; 243 const PetscScalar *bwork,*diag = red->diag; 244 245 PetscFunctionBegin; 246 if (!red->work) { 247 PetscCall(VecDuplicate(b,&red->work)); 248 } 249 /* compute the rows of solution that have diagonal entries only */ 250 PetscCall(VecSet(x,0.0)); /* x = diag(A)^{-1} b */ 251 PetscCall(VecGetArray(x,&xwork)); 252 PetscCall(VecGetArrayRead(b,&bwork)); 253 for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]]; 254 PetscCall(PetscLogFlops(dcnt)); 255 PetscCall(VecRestoreArray(red->work,&xwork)); 256 PetscCall(VecRestoreArrayRead(b,&bwork)); 257 /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */ 258 PetscCall(MatMult(pc->pmat,x,red->work)); 259 PetscCall(VecAYPX(red->work,-1.0,b)); /* red->work = b - A x */ 260 261 PetscCall(VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD)); 262 PetscCall(VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD)); 263 PetscCall(KSPSolve(red->ksp,red->b,red->x)); 264 PetscCall(KSPCheckSolve(red->ksp,pc,red->x)); 265 PetscCall(VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE)); 266 PetscCall(VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE)); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode PCDestroy_Redistribute(PC pc) 271 { 272 PC_Redistribute *red = (PC_Redistribute*)pc->data; 273 274 PetscFunctionBegin; 275 PetscCall(VecScatterDestroy(&red->scatter)); 276 PetscCall(ISDestroy(&red->is)); 277 PetscCall(VecDestroy(&red->b)); 278 PetscCall(VecDestroy(&red->x)); 279 PetscCall(KSPDestroy(&red->ksp)); 280 PetscCall(VecDestroy(&red->work)); 281 PetscCall(PetscFree(red->drows)); 282 PetscCall(PetscFree(red->diag)); 283 PetscCall(PetscFree(pc->data)); 284 PetscFunctionReturn(0); 285 } 286 287 static PetscErrorCode PCSetFromOptions_Redistribute(PetscOptionItems *PetscOptionsObject,PC pc) 288 { 289 PC_Redistribute *red = (PC_Redistribute*)pc->data; 290 291 PetscFunctionBegin; 292 PetscCall(KSPSetFromOptions(red->ksp)); 293 PetscFunctionReturn(0); 294 } 295 296 /*@ 297 PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE 298 299 Not Collective 300 301 Input Parameter: 302 . pc - the preconditioner context 303 304 Output Parameter: 305 . innerksp - the inner KSP 306 307 Level: advanced 308 309 @*/ 310 PetscErrorCode PCRedistributeGetKSP(PC pc,KSP *innerksp) 311 { 312 PC_Redistribute *red = (PC_Redistribute*)pc->data; 313 314 PetscFunctionBegin; 315 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 316 PetscValidPointer(innerksp,2); 317 *innerksp = red->ksp; 318 PetscFunctionReturn(0); 319 } 320 321 /* -------------------------------------------------------------------------------------*/ 322 /*MC 323 PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then 324 applys a KSP to that new smaller matrix 325 326 Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values> 327 328 Notes: 329 Usually run this with -ksp_type preonly 330 331 If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly 332 -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry. 333 334 This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. 335 336 Developer Notes: 337 Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication. 338 339 Level: intermediate 340 341 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()` 342 M*/ 343 344 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc) 345 { 346 PC_Redistribute *red; 347 const char *prefix; 348 349 PetscFunctionBegin; 350 PetscCall(PetscNewLog(pc,&red)); 351 pc->data = (void*)red; 352 353 pc->ops->apply = PCApply_Redistribute; 354 pc->ops->applytranspose = NULL; 355 pc->ops->setup = PCSetUp_Redistribute; 356 pc->ops->destroy = PCDestroy_Redistribute; 357 pc->ops->setfromoptions = PCSetFromOptions_Redistribute; 358 pc->ops->view = PCView_Redistribute; 359 360 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc),&red->ksp)); 361 PetscCall(KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure)); 362 PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1)); 363 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp)); 364 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 365 PetscCall(KSPSetOptionsPrefix(red->ksp,prefix)); 366 PetscCall(KSPAppendOptionsPrefix(red->ksp,"redistribute_")); 367 PetscFunctionReturn(0); 368 } 369