1 2 /* 3 This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner. 4 */ 5 #include <petsc/private/pcimpl.h> /*I "petscksp.h" I*/ 6 #include <petscksp.h> 7 8 typedef struct { 9 KSP ksp; 10 Vec x,b; 11 VecScatter scatter; 12 IS is; 13 PetscInt dcnt,*drows; /* these are the local rows that have only diagonal entry */ 14 PetscScalar *diag; 15 Vec work; 16 } PC_Redistribute; 17 18 static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer) 19 { 20 PC_Redistribute *red = (PC_Redistribute*)pc->data; 21 PetscBool iascii,isstring; 22 PetscInt ncnt,N; 23 24 PetscFunctionBegin; 25 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 26 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring)); 27 if (iascii) { 28 CHKERRMPI(MPIU_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 29 CHKERRQ(MatGetSize(pc->pmat,&N,NULL)); 30 CHKERRQ(PetscViewerASCIIPrintf(viewer," Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N))); 31 CHKERRQ(PetscViewerASCIIPrintf(viewer," Redistribute preconditioner: \n")); 32 CHKERRQ(KSPView(red->ksp,viewer)); 33 } else if (isstring) { 34 CHKERRQ(PetscViewerStringSPrintf(viewer," Redistribute preconditioner")); 35 CHKERRQ(KSPView(red->ksp,viewer)); 36 } 37 PetscFunctionReturn(0); 38 } 39 40 static PetscErrorCode PCSetUp_Redistribute(PC pc) 41 { 42 PC_Redistribute *red = (PC_Redistribute*)pc->data; 43 MPI_Comm comm; 44 PetscInt rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows; 45 PetscLayout map,nmap; 46 PetscMPIInt size,tag,n; 47 PETSC_UNUSED PetscMPIInt imdex; 48 PetscInt *source = NULL; 49 PetscMPIInt *sizes = NULL,nrecvs; 50 PetscInt j,nsends; 51 PetscInt *owner = NULL,*starts = NULL,count,slen; 52 PetscInt *rvalues,*svalues,recvtotal; 53 PetscMPIInt *onodes1,*olengths1; 54 MPI_Request *send_waits = NULL,*recv_waits = NULL; 55 MPI_Status recv_status,*send_status; 56 Vec tvec,diag; 57 Mat tmat; 58 const PetscScalar *d,*values; 59 const PetscInt *cols; 60 61 PetscFunctionBegin; 62 if (pc->setupcalled) { 63 CHKERRQ(KSPGetOperators(red->ksp,NULL,&tmat)); 64 CHKERRQ(MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat)); 65 CHKERRQ(KSPSetOperators(red->ksp,tmat,tmat)); 66 } else { 67 PetscInt NN; 68 69 CHKERRQ(PetscObjectGetComm((PetscObject)pc,&comm)); 70 CHKERRMPI(MPI_Comm_size(comm,&size)); 71 CHKERRQ(PetscObjectGetNewTag((PetscObject)pc,&tag)); 72 73 /* count non-diagonal rows on process */ 74 CHKERRQ(MatGetOwnershipRange(pc->mat,&rstart,&rend)); 75 cnt = 0; 76 for (i=rstart; i<rend; i++) { 77 CHKERRQ(MatGetRow(pc->mat,i,&nz,&cols,&values)); 78 for (PetscInt j=0; j<nz; j++) { 79 if (values[j] != 0 && cols[j] != i) { 80 cnt++; 81 break; 82 } 83 } 84 CHKERRQ(MatRestoreRow(pc->mat,i,&nz,&cols,&values)); 85 } 86 CHKERRQ(PetscMalloc1(cnt,&rows)); 87 CHKERRQ(PetscMalloc1(rend - rstart - cnt,&drows)); 88 89 /* list non-diagonal rows on process */ 90 cnt = 0; dcnt = 0; 91 for (i=rstart; i<rend; i++) { 92 PetscBool diagonly = PETSC_TRUE; 93 CHKERRQ(MatGetRow(pc->mat,i,&nz,&cols,&values)); 94 for (PetscInt j=0; j<nz; j++) { 95 if (values[j] != 0 && cols[j] != i) { 96 diagonly = PETSC_FALSE; 97 break; 98 } 99 } 100 if (!diagonly) rows[cnt++] = i; 101 else drows[dcnt++] = i - rstart; 102 CHKERRQ(MatRestoreRow(pc->mat,i,&nz,&cols,&values)); 103 } 104 105 /* create PetscLayout for non-diagonal rows on each process */ 106 CHKERRQ(PetscLayoutCreate(comm,&map)); 107 CHKERRQ(PetscLayoutSetLocalSize(map,cnt)); 108 CHKERRQ(PetscLayoutSetBlockSize(map,1)); 109 CHKERRQ(PetscLayoutSetUp(map)); 110 rstart = map->rstart; 111 rend = map->rend; 112 113 /* create PetscLayout for load-balanced non-diagonal rows on each process */ 114 CHKERRQ(PetscLayoutCreate(comm,&nmap)); 115 CHKERRMPI(MPIU_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm)); 116 CHKERRQ(PetscLayoutSetSize(nmap,ncnt)); 117 CHKERRQ(PetscLayoutSetBlockSize(nmap,1)); 118 CHKERRQ(PetscLayoutSetUp(nmap)); 119 120 CHKERRQ(MatGetSize(pc->pmat,&NN,NULL)); 121 CHKERRQ(PetscInfo(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)))); 122 123 if (size > 1) { 124 /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */ 125 /* 126 this code is taken from VecScatterCreate_PtoS() 127 Determines what rows need to be moved where to 128 load balance the non-diagonal rows 129 */ 130 /* count number of contributors to each processor */ 131 CHKERRQ(PetscMalloc2(size,&sizes,cnt,&owner)); 132 CHKERRQ(PetscArrayzero(sizes,size)); 133 j = 0; 134 nsends = 0; 135 for (i=rstart; i<rend; i++) { 136 if (i < nmap->range[j]) j = 0; 137 for (; j<size; j++) { 138 if (i < nmap->range[j+1]) { 139 if (!sizes[j]++) nsends++; 140 owner[i-rstart] = j; 141 break; 142 } 143 } 144 } 145 /* inform other processors of number of messages and max length*/ 146 CHKERRQ(PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs)); 147 CHKERRQ(PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1)); 148 CHKERRQ(PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1)); 149 recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i]; 150 151 /* post receives: rvalues - rows I will own; count - nu */ 152 CHKERRQ(PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits)); 153 count = 0; 154 for (i=0; i<nrecvs; i++) { 155 CHKERRMPI(MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i)); 156 count += olengths1[i]; 157 } 158 159 /* do sends: 160 1) starts[i] gives the starting index in svalues for stuff going to 161 the ith processor 162 */ 163 CHKERRQ(PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts)); 164 starts[0] = 0; 165 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1]; 166 for (i=0; i<cnt; i++) svalues[starts[owner[i]]++] = rows[i]; 167 for (i=0; i<cnt; i++) rows[i] = rows[i] - rstart; 168 red->drows = drows; 169 red->dcnt = dcnt; 170 CHKERRQ(PetscFree(rows)); 171 172 starts[0] = 0; 173 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1]; 174 count = 0; 175 for (i=0; i<size; i++) { 176 if (sizes[i]) { 177 CHKERRMPI(MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++)); 178 } 179 } 180 181 /* wait on receives */ 182 count = nrecvs; 183 slen = 0; 184 while (count) { 185 CHKERRMPI(MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status)); 186 /* unpack receives into our local space */ 187 CHKERRMPI(MPI_Get_count(&recv_status,MPIU_INT,&n)); 188 slen += n; 189 count--; 190 } 191 PetscCheckFalse(slen != recvtotal,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal); 192 CHKERRQ(ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is)); 193 194 /* free all work space */ 195 CHKERRQ(PetscFree(olengths1)); 196 CHKERRQ(PetscFree(onodes1)); 197 CHKERRQ(PetscFree3(rvalues,source,recv_waits)); 198 CHKERRQ(PetscFree2(sizes,owner)); 199 if (nsends) { /* wait on sends */ 200 CHKERRQ(PetscMalloc1(nsends,&send_status)); 201 CHKERRMPI(MPI_Waitall(nsends,send_waits,send_status)); 202 CHKERRQ(PetscFree(send_status)); 203 } 204 CHKERRQ(PetscFree3(svalues,send_waits,starts)); 205 } else { 206 CHKERRQ(ISCreateGeneral(comm,cnt,rows,PETSC_OWN_POINTER,&red->is)); 207 red->drows = drows; 208 red->dcnt = dcnt; 209 slen = cnt; 210 } 211 CHKERRQ(PetscLayoutDestroy(&map)); 212 CHKERRQ(PetscLayoutDestroy(&nmap)); 213 214 CHKERRQ(VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b)); 215 CHKERRQ(VecDuplicate(red->b,&red->x)); 216 CHKERRQ(MatCreateVecs(pc->pmat,&tvec,NULL)); 217 CHKERRQ(VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter)); 218 CHKERRQ(VecDestroy(&tvec)); 219 CHKERRQ(MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat)); 220 CHKERRQ(KSPSetOperators(red->ksp,tmat,tmat)); 221 CHKERRQ(MatDestroy(&tmat)); 222 } 223 224 /* get diagonal portion of matrix */ 225 CHKERRQ(PetscFree(red->diag)); 226 CHKERRQ(PetscMalloc1(red->dcnt,&red->diag)); 227 CHKERRQ(MatCreateVecs(pc->pmat,&diag,NULL)); 228 CHKERRQ(MatGetDiagonal(pc->pmat,diag)); 229 CHKERRQ(VecGetArrayRead(diag,&d)); 230 for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]]; 231 CHKERRQ(VecRestoreArrayRead(diag,&d)); 232 CHKERRQ(VecDestroy(&diag)); 233 CHKERRQ(KSPSetUp(red->ksp)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x) 238 { 239 PC_Redistribute *red = (PC_Redistribute*)pc->data; 240 PetscInt dcnt = red->dcnt,i; 241 const PetscInt *drows = red->drows; 242 PetscScalar *xwork; 243 const PetscScalar *bwork,*diag = red->diag; 244 245 PetscFunctionBegin; 246 if (!red->work) { 247 CHKERRQ(VecDuplicate(b,&red->work)); 248 } 249 /* compute the rows of solution that have diagonal entries only */ 250 CHKERRQ(VecSet(x,0.0)); /* x = diag(A)^{-1} b */ 251 CHKERRQ(VecGetArray(x,&xwork)); 252 CHKERRQ(VecGetArrayRead(b,&bwork)); 253 for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]]; 254 CHKERRQ(PetscLogFlops(dcnt)); 255 CHKERRQ(VecRestoreArray(red->work,&xwork)); 256 CHKERRQ(VecRestoreArrayRead(b,&bwork)); 257 /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */ 258 CHKERRQ(MatMult(pc->pmat,x,red->work)); 259 CHKERRQ(VecAYPX(red->work,-1.0,b)); /* red->work = b - A x */ 260 261 CHKERRQ(VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD)); 262 CHKERRQ(VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD)); 263 CHKERRQ(KSPSolve(red->ksp,red->b,red->x)); 264 CHKERRQ(KSPCheckSolve(red->ksp,pc,red->x)); 265 CHKERRQ(VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE)); 266 CHKERRQ(VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE)); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode PCDestroy_Redistribute(PC pc) 271 { 272 PC_Redistribute *red = (PC_Redistribute*)pc->data; 273 274 PetscFunctionBegin; 275 CHKERRQ(VecScatterDestroy(&red->scatter)); 276 CHKERRQ(ISDestroy(&red->is)); 277 CHKERRQ(VecDestroy(&red->b)); 278 CHKERRQ(VecDestroy(&red->x)); 279 CHKERRQ(KSPDestroy(&red->ksp)); 280 CHKERRQ(VecDestroy(&red->work)); 281 CHKERRQ(PetscFree(red->drows)); 282 CHKERRQ(PetscFree(red->diag)); 283 CHKERRQ(PetscFree(pc->data)); 284 PetscFunctionReturn(0); 285 } 286 287 static PetscErrorCode PCSetFromOptions_Redistribute(PetscOptionItems *PetscOptionsObject,PC pc) 288 { 289 PC_Redistribute *red = (PC_Redistribute*)pc->data; 290 291 PetscFunctionBegin; 292 CHKERRQ(KSPSetFromOptions(red->ksp)); 293 PetscFunctionReturn(0); 294 } 295 296 /*@ 297 PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE 298 299 Not Collective 300 301 Input Parameter: 302 . pc - the preconditioner context 303 304 Output Parameter: 305 . innerksp - the inner KSP 306 307 Level: advanced 308 309 @*/ 310 PetscErrorCode PCRedistributeGetKSP(PC pc,KSP *innerksp) 311 { 312 PC_Redistribute *red = (PC_Redistribute*)pc->data; 313 314 PetscFunctionBegin; 315 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 316 PetscValidPointer(innerksp,2); 317 *innerksp = red->ksp; 318 PetscFunctionReturn(0); 319 } 320 321 /* -------------------------------------------------------------------------------------*/ 322 /*MC 323 PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix 324 325 Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values> 326 327 Notes: 328 Usually run this with -ksp_type preonly 329 330 If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly 331 -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry. 332 333 This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. 334 335 Developer Notes: 336 Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication. 337 338 Level: intermediate 339 340 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP() 341 M*/ 342 343 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc) 344 { 345 PC_Redistribute *red; 346 const char *prefix; 347 348 PetscFunctionBegin; 349 CHKERRQ(PetscNewLog(pc,&red)); 350 pc->data = (void*)red; 351 352 pc->ops->apply = PCApply_Redistribute; 353 pc->ops->applytranspose = NULL; 354 pc->ops->setup = PCSetUp_Redistribute; 355 pc->ops->destroy = PCDestroy_Redistribute; 356 pc->ops->setfromoptions = PCSetFromOptions_Redistribute; 357 pc->ops->view = PCView_Redistribute; 358 359 CHKERRQ(KSPCreate(PetscObjectComm((PetscObject)pc),&red->ksp)); 360 CHKERRQ(KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure)); 361 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1)); 362 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp)); 363 CHKERRQ(PCGetOptionsPrefix(pc,&prefix)); 364 CHKERRQ(KSPSetOptionsPrefix(red->ksp,prefix)); 365 CHKERRQ(KSPAppendOptionsPrefix(red->ksp,"redistribute_")); 366 PetscFunctionReturn(0); 367 } 368