1 2 /* 3 Defines a block Jacobi preconditioner. 4 */ 5 6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/ 7 8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat); 9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat); 10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC); 11 12 static PetscErrorCode PCSetUp_BJacobi(PC pc) 13 { 14 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 15 Mat mat = pc->mat,pmat = pc->pmat; 16 PetscBool hasop; 17 PetscInt N,M,start,i,sum,end; 18 PetscInt bs,i_start=-1,i_end=-1; 19 PetscMPIInt rank,size; 20 21 PetscFunctionBegin; 22 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 23 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 24 CHKERRQ(MatGetLocalSize(pc->pmat,&M,&N)); 25 CHKERRQ(MatGetBlockSize(pc->pmat,&bs)); 26 27 if (jac->n > 0 && jac->n < size) { 28 CHKERRQ(PCSetUp_BJacobi_Multiproc(pc)); 29 PetscFunctionReturn(0); 30 } 31 32 /* -------------------------------------------------------------------------- 33 Determines the number of blocks assigned to each processor 34 -----------------------------------------------------------------------------*/ 35 36 /* local block count given */ 37 if (jac->n_local > 0 && jac->n < 0) { 38 CHKERRMPI(MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 39 if (jac->l_lens) { /* check that user set these correctly */ 40 sum = 0; 41 for (i=0; i<jac->n_local; i++) { 42 PetscCheckFalse(jac->l_lens[i]/bs*bs !=jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout"); 43 sum += jac->l_lens[i]; 44 } 45 PetscCheckFalse(sum != M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly"); 46 } else { 47 CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens)); 48 for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i)); 49 } 50 } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */ 51 /* global blocks given: determine which ones are local */ 52 if (jac->g_lens) { 53 /* check if the g_lens is has valid entries */ 54 for (i=0; i<jac->n; i++) { 55 PetscCheckFalse(!jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed"); 56 PetscCheckFalse(jac->g_lens[i]/bs*bs != jac->g_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout"); 57 } 58 if (size == 1) { 59 jac->n_local = jac->n; 60 CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens)); 61 CHKERRQ(PetscArraycpy(jac->l_lens,jac->g_lens,jac->n_local)); 62 /* check that user set these correctly */ 63 sum = 0; 64 for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i]; 65 PetscCheckFalse(sum != M,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly"); 66 } else { 67 CHKERRQ(MatGetOwnershipRange(pc->pmat,&start,&end)); 68 /* loop over blocks determing first one owned by me */ 69 sum = 0; 70 for (i=0; i<jac->n+1; i++) { 71 if (sum == start) { i_start = i; goto start_1;} 72 if (i < jac->n) sum += jac->g_lens[i]; 73 } 74 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 75 start_1: 76 for (i=i_start; i<jac->n+1; i++) { 77 if (sum == end) { i_end = i; goto end_1; } 78 if (i < jac->n) sum += jac->g_lens[i]; 79 } 80 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout"); 81 end_1: 82 jac->n_local = i_end - i_start; 83 CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens)); 84 CHKERRQ(PetscArraycpy(jac->l_lens,jac->g_lens+i_start,jac->n_local)); 85 } 86 } else { /* no global blocks given, determine then using default layout */ 87 jac->n_local = jac->n/size + ((jac->n % size) > rank); 88 CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens)); 89 for (i=0; i<jac->n_local; i++) { 90 jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs; 91 PetscCheckFalse(!jac->l_lens[i],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given"); 92 } 93 } 94 } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */ 95 jac->n = size; 96 jac->n_local = 1; 97 CHKERRQ(PetscMalloc1(1,&jac->l_lens)); 98 jac->l_lens[0] = M; 99 } else { /* jac->n > 0 && jac->n_local > 0 */ 100 if (!jac->l_lens) { 101 CHKERRQ(PetscMalloc1(jac->n_local,&jac->l_lens)); 102 for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i)); 103 } 104 } 105 PetscCheckFalse(jac->n_local < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors"); 106 107 /* ------------------------- 108 Determines mat and pmat 109 ---------------------------*/ 110 CHKERRQ(MatHasOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&hasop)); 111 if (!hasop && size == 1) { 112 mat = pc->mat; 113 pmat = pc->pmat; 114 } else { 115 if (pc->useAmat) { 116 /* use block from Amat matrix, not Pmat for local MatMult() */ 117 CHKERRQ(MatGetDiagonalBlock(pc->mat,&mat)); 118 } 119 if (pc->pmat != pc->mat || !pc->useAmat) { 120 CHKERRQ(MatGetDiagonalBlock(pc->pmat,&pmat)); 121 } else pmat = mat; 122 } 123 124 /* ------ 125 Setup code depends on the number of blocks 126 */ 127 if (jac->n_local == 1) { 128 CHKERRQ(PCSetUp_BJacobi_Singleblock(pc,mat,pmat)); 129 } else { 130 CHKERRQ(PCSetUp_BJacobi_Multiblock(pc,mat,pmat)); 131 } 132 PetscFunctionReturn(0); 133 } 134 135 /* Default destroy, if it has never been setup */ 136 static PetscErrorCode PCDestroy_BJacobi(PC pc) 137 { 138 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 139 140 PetscFunctionBegin; 141 CHKERRQ(PetscFree(jac->g_lens)); 142 CHKERRQ(PetscFree(jac->l_lens)); 143 CHKERRQ(PetscFree(pc->data)); 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc) 148 { 149 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 150 PetscInt blocks,i; 151 PetscBool flg; 152 153 PetscFunctionBegin; 154 CHKERRQ(PetscOptionsHead(PetscOptionsObject,"Block Jacobi options")); 155 CHKERRQ(PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg)); 156 if (flg) CHKERRQ(PCBJacobiSetTotalBlocks(pc,blocks,NULL)); 157 CHKERRQ(PetscOptionsInt("-pc_bjacobi_local_blocks","Local number of blocks","PCBJacobiSetLocalBlocks",jac->n_local,&blocks,&flg)); 158 if (flg) CHKERRQ(PCBJacobiSetLocalBlocks(pc,blocks,NULL)); 159 if (jac->ksp) { 160 /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called 161 * unless we had already been called. */ 162 for (i=0; i<jac->n_local; i++) { 163 CHKERRQ(KSPSetFromOptions(jac->ksp[i])); 164 } 165 } 166 CHKERRQ(PetscOptionsTail()); 167 PetscFunctionReturn(0); 168 } 169 170 #include <petscdraw.h> 171 static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer) 172 { 173 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 174 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data; 175 PetscErrorCode ierr; 176 PetscMPIInt rank; 177 PetscInt i; 178 PetscBool iascii,isstring,isdraw; 179 PetscViewer sviewer; 180 PetscViewerFormat format; 181 const char *prefix; 182 183 PetscFunctionBegin; 184 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 185 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring)); 186 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 187 if (iascii) { 188 if (pc->useAmat) { 189 CHKERRQ(PetscViewerASCIIPrintf(viewer," using Amat local matrix, number of blocks = %D\n",jac->n)); 190 } 191 CHKERRQ(PetscViewerASCIIPrintf(viewer," number of blocks = %D\n",jac->n)); 192 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 193 CHKERRQ(PetscViewerGetFormat(viewer,&format)); 194 if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) { 195 CHKERRQ(PetscViewerASCIIPrintf(viewer," Local solver information for first block is in the following KSP and PC objects on rank 0:\n")); 196 CHKERRQ(PCGetOptionsPrefix(pc,&prefix)); 197 CHKERRQ(PetscViewerASCIIPrintf(viewer," Use -%sksp_view ::ascii_info_detail to display information for all blocks\n",prefix?prefix:"")); 198 if (jac->ksp && !jac->psubcomm) { 199 CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 200 if (rank == 0) { 201 CHKERRQ(PetscViewerASCIIPushTab(viewer)); 202 CHKERRQ(KSPView(jac->ksp[0],sviewer)); 203 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 204 } 205 CHKERRQ(PetscViewerFlush(sviewer)); 206 CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 207 CHKERRQ(PetscViewerFlush(viewer)); 208 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 209 CHKERRQ(PetscViewerASCIIPopSynchronized(viewer)); 210 } else if (mpjac && jac->ksp && mpjac->psubcomm) { 211 CHKERRQ(PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer)); 212 if (!mpjac->psubcomm->color) { 213 CHKERRQ(PetscViewerASCIIPushTab(viewer)); 214 CHKERRQ(KSPView(*(jac->ksp),sviewer)); 215 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 216 } 217 CHKERRQ(PetscViewerFlush(sviewer)); 218 CHKERRQ(PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer)); 219 CHKERRQ(PetscViewerFlush(viewer)); 220 /* extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 221 CHKERRQ(PetscViewerASCIIPopSynchronized(viewer)); 222 } else { 223 CHKERRQ(PetscViewerFlush(viewer)); 224 } 225 } else { 226 PetscInt n_global; 227 CHKERRMPI(MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 228 CHKERRQ(PetscViewerASCIIPushSynchronized(viewer)); 229 CHKERRQ(PetscViewerASCIIPrintf(viewer," Local solver information for each block is in the following KSP and PC objects:\n")); 230 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D, first local block number = %D\n", 231 rank,jac->n_local,jac->first_local);CHKERRQ(ierr); 232 CHKERRQ(PetscViewerASCIIPushTab(viewer)); 233 CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 234 for (i=0; i<jac->n_local; i++) { 235 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %D\n",rank,i)); 236 CHKERRQ(KSPView(jac->ksp[i],sviewer)); 237 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n")); 238 } 239 CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 240 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 241 CHKERRQ(PetscViewerFlush(viewer)); 242 CHKERRQ(PetscViewerASCIIPopSynchronized(viewer)); 243 } 244 } else if (isstring) { 245 CHKERRQ(PetscViewerStringSPrintf(viewer," blks=%D",jac->n)); 246 CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 247 if (jac->ksp) CHKERRQ(KSPView(jac->ksp[0],sviewer)); 248 CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 249 } else if (isdraw) { 250 PetscDraw draw; 251 char str[25]; 252 PetscReal x,y,bottom,h; 253 254 CHKERRQ(PetscViewerDrawGetDraw(viewer,0,&draw)); 255 CHKERRQ(PetscDrawGetCurrentPoint(draw,&x,&y)); 256 CHKERRQ(PetscSNPrintf(str,25,"Number blocks %D",jac->n)); 257 CHKERRQ(PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h)); 258 bottom = y - h; 259 CHKERRQ(PetscDrawPushCurrentPoint(draw,x,bottom)); 260 /* warning the communicator on viewer is different then on ksp in parallel */ 261 if (jac->ksp) CHKERRQ(KSPView(jac->ksp[0],viewer)); 262 CHKERRQ(PetscDrawPopCurrentPoint(draw)); 263 } 264 PetscFunctionReturn(0); 265 } 266 267 /* -------------------------------------------------------------------------------------*/ 268 269 static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp) 270 { 271 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 272 273 PetscFunctionBegin; 274 PetscCheckFalse(!pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first"); 275 276 if (n_local) *n_local = jac->n_local; 277 if (first_local) *first_local = jac->first_local; 278 if (ksp) *ksp = jac->ksp; 279 PetscFunctionReturn(0); 280 } 281 282 static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens) 283 { 284 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 285 286 PetscFunctionBegin; 287 PetscCheckFalse(pc->setupcalled > 0 && jac->n!=blocks,PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called"); 288 jac->n = blocks; 289 if (!lens) jac->g_lens = NULL; 290 else { 291 CHKERRQ(PetscMalloc1(blocks,&jac->g_lens)); 292 CHKERRQ(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt))); 293 CHKERRQ(PetscArraycpy(jac->g_lens,lens,blocks)); 294 } 295 PetscFunctionReturn(0); 296 } 297 298 static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 299 { 300 PC_BJacobi *jac = (PC_BJacobi*) pc->data; 301 302 PetscFunctionBegin; 303 *blocks = jac->n; 304 if (lens) *lens = jac->g_lens; 305 PetscFunctionReturn(0); 306 } 307 308 static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[]) 309 { 310 PC_BJacobi *jac; 311 312 PetscFunctionBegin; 313 jac = (PC_BJacobi*)pc->data; 314 315 jac->n_local = blocks; 316 if (!lens) jac->l_lens = NULL; 317 else { 318 CHKERRQ(PetscMalloc1(blocks,&jac->l_lens)); 319 CHKERRQ(PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt))); 320 CHKERRQ(PetscArraycpy(jac->l_lens,lens,blocks)); 321 } 322 PetscFunctionReturn(0); 323 } 324 325 static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[]) 326 { 327 PC_BJacobi *jac = (PC_BJacobi*) pc->data; 328 329 PetscFunctionBegin; 330 *blocks = jac->n_local; 331 if (lens) *lens = jac->l_lens; 332 PetscFunctionReturn(0); 333 } 334 335 /* -------------------------------------------------------------------------------------*/ 336 337 /*@C 338 PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on 339 this processor. 340 341 Not Collective 342 343 Input Parameter: 344 . pc - the preconditioner context 345 346 Output Parameters: 347 + n_local - the number of blocks on this processor, or NULL 348 . first_local - the global number of the first block on this processor, or NULL 349 - ksp - the array of KSP contexts 350 351 Notes: 352 After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed. 353 354 Currently for some matrix implementations only 1 block per processor 355 is supported. 356 357 You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP(). 358 359 Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs. 360 You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the 361 KSP array must be. 362 363 Level: advanced 364 365 .seealso: PCASMGetSubKSP() 366 @*/ 367 PetscErrorCode PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 368 { 369 PetscFunctionBegin; 370 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 371 CHKERRQ(PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp))); 372 PetscFunctionReturn(0); 373 } 374 375 /*@ 376 PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block 377 Jacobi preconditioner. 378 379 Collective on PC 380 381 Input Parameters: 382 + pc - the preconditioner context 383 . blocks - the number of blocks 384 - lens - [optional] integer array containing the size of each block 385 386 Options Database Key: 387 . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks 388 389 Notes: 390 Currently only a limited number of blocking configurations are supported. 391 All processors sharing the PC must call this routine with the same data. 392 393 Level: intermediate 394 395 .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks() 396 @*/ 397 PetscErrorCode PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[]) 398 { 399 PetscFunctionBegin; 400 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 401 PetscCheckFalse(blocks <= 0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks"); 402 CHKERRQ(PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens))); 403 PetscFunctionReturn(0); 404 } 405 406 /*@C 407 PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block 408 Jacobi preconditioner. 409 410 Not Collective 411 412 Input Parameter: 413 . pc - the preconditioner context 414 415 Output parameters: 416 + blocks - the number of blocks 417 - lens - integer array containing the size of each block 418 419 Level: intermediate 420 421 .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks() 422 @*/ 423 PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 424 { 425 PetscFunctionBegin; 426 PetscValidHeaderSpecific(pc, PC_CLASSID,1); 427 PetscValidIntPointer(blocks,2); 428 CHKERRQ(PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens))); 429 PetscFunctionReturn(0); 430 } 431 432 /*@ 433 PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block 434 Jacobi preconditioner. 435 436 Not Collective 437 438 Input Parameters: 439 + pc - the preconditioner context 440 . blocks - the number of blocks 441 - lens - [optional] integer array containing size of each block 442 443 Options Database Key: 444 . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks 445 446 Note: 447 Currently only a limited number of blocking configurations are supported. 448 449 Level: intermediate 450 451 .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks() 452 @*/ 453 PetscErrorCode PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[]) 454 { 455 PetscFunctionBegin; 456 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 457 PetscCheckFalse(blocks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks"); 458 CHKERRQ(PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens))); 459 PetscFunctionReturn(0); 460 } 461 462 /*@C 463 PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block 464 Jacobi preconditioner. 465 466 Not Collective 467 468 Input Parameters: 469 + pc - the preconditioner context 470 . blocks - the number of blocks 471 - lens - [optional] integer array containing size of each block 472 473 Note: 474 Currently only a limited number of blocking configurations are supported. 475 476 Level: intermediate 477 478 .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks() 479 @*/ 480 PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[]) 481 { 482 PetscFunctionBegin; 483 PetscValidHeaderSpecific(pc, PC_CLASSID,1); 484 PetscValidIntPointer(blocks,2); 485 CHKERRQ(PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens))); 486 PetscFunctionReturn(0); 487 } 488 489 /* -----------------------------------------------------------------------------------*/ 490 491 /*MC 492 PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with 493 its own KSP object. 494 495 Options Database Keys: 496 + -pc_use_amat - use Amat to apply block of operator in inner Krylov method 497 - -pc_bjacobi_blocks <n> - use n total blocks 498 499 Notes: 500 Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor. 501 502 To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC 503 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 504 505 To set the options on the solvers separate for each block call PCBJacobiGetSubKSP() 506 and set the options directly on the resulting KSP object (you can access its PC 507 KSPGetPC()) 508 509 For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best 510 performance. Different block partitioning may lead to additional data transfers 511 between host and GPU that lead to degraded performance. 512 513 The options prefix for each block is sub_, for example -sub_pc_type lu. 514 515 When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes. 516 517 See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCPBJACOBI for large blocks 518 519 Level: beginner 520 521 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 522 PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(), 523 PCBJacobiSetLocalBlocks(), PCSetModifySubMatrices(), PCJACOBI, PCVPBJACOBI, PCPBJACOBI 524 M*/ 525 526 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc) 527 { 528 PetscMPIInt rank; 529 PC_BJacobi *jac; 530 531 PetscFunctionBegin; 532 CHKERRQ(PetscNewLog(pc,&jac)); 533 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 534 535 pc->ops->apply = NULL; 536 pc->ops->matapply = NULL; 537 pc->ops->applytranspose = NULL; 538 pc->ops->setup = PCSetUp_BJacobi; 539 pc->ops->destroy = PCDestroy_BJacobi; 540 pc->ops->setfromoptions = PCSetFromOptions_BJacobi; 541 pc->ops->view = PCView_BJacobi; 542 pc->ops->applyrichardson = NULL; 543 544 pc->data = (void*)jac; 545 jac->n = -1; 546 jac->n_local = -1; 547 jac->first_local = rank; 548 jac->ksp = NULL; 549 jac->g_lens = NULL; 550 jac->l_lens = NULL; 551 jac->psubcomm = NULL; 552 553 CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi)); 554 CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi)); 555 CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi)); 556 CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi)); 557 CHKERRQ(PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi)); 558 PetscFunctionReturn(0); 559 } 560 561 /* --------------------------------------------------------------------------------------------*/ 562 /* 563 These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI 564 */ 565 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc) 566 { 567 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 568 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data; 569 570 PetscFunctionBegin; 571 CHKERRQ(KSPReset(jac->ksp[0])); 572 CHKERRQ(VecDestroy(&bjac->x)); 573 CHKERRQ(VecDestroy(&bjac->y)); 574 PetscFunctionReturn(0); 575 } 576 577 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc) 578 { 579 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 580 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data; 581 582 PetscFunctionBegin; 583 CHKERRQ(PCReset_BJacobi_Singleblock(pc)); 584 CHKERRQ(KSPDestroy(&jac->ksp[0])); 585 CHKERRQ(PetscFree(jac->ksp)); 586 CHKERRQ(PetscFree(jac->l_lens)); 587 CHKERRQ(PetscFree(jac->g_lens)); 588 CHKERRQ(PetscFree(bjac)); 589 CHKERRQ(PetscFree(pc->data)); 590 PetscFunctionReturn(0); 591 } 592 593 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc) 594 { 595 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 596 KSP subksp = jac->ksp[0]; 597 KSPConvergedReason reason; 598 599 PetscFunctionBegin; 600 CHKERRQ(KSPSetUp(subksp)); 601 CHKERRQ(KSPGetConvergedReason(subksp,&reason)); 602 if (reason == KSP_DIVERGED_PC_FAILED) { 603 pc->failedreason = PC_SUBPC_ERROR; 604 } 605 PetscFunctionReturn(0); 606 } 607 608 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y) 609 { 610 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 611 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data; 612 613 PetscFunctionBegin; 614 CHKERRQ(VecGetLocalVectorRead(x, bjac->x)); 615 CHKERRQ(VecGetLocalVector(y, bjac->y)); 616 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 617 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 618 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 619 CHKERRQ(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner)); 620 CHKERRQ(KSPSolve(jac->ksp[0],bjac->x,bjac->y)); 621 CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,bjac->y)); 622 CHKERRQ(VecRestoreLocalVectorRead(x, bjac->x)); 623 CHKERRQ(VecRestoreLocalVector(y, bjac->y)); 624 PetscFunctionReturn(0); 625 } 626 627 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc,Mat X,Mat Y) 628 { 629 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 630 Mat sX,sY; 631 632 PetscFunctionBegin; 633 /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner 634 matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild 635 of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/ 636 CHKERRQ(KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner)); 637 CHKERRQ(MatDenseGetLocalMatrix(X,&sX)); 638 CHKERRQ(MatDenseGetLocalMatrix(Y,&sY)); 639 CHKERRQ(KSPMatSolve(jac->ksp[0],sX,sY)); 640 PetscFunctionReturn(0); 641 } 642 643 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y) 644 { 645 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 646 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data; 647 PetscScalar *y_array; 648 const PetscScalar *x_array; 649 PC subpc; 650 651 PetscFunctionBegin; 652 /* 653 The VecPlaceArray() is to avoid having to copy the 654 y vector into the bjac->x vector. The reason for 655 the bjac->x vector is that we need a sequential vector 656 for the sequential solve. 657 */ 658 CHKERRQ(VecGetArrayRead(x,&x_array)); 659 CHKERRQ(VecGetArray(y,&y_array)); 660 CHKERRQ(VecPlaceArray(bjac->x,x_array)); 661 CHKERRQ(VecPlaceArray(bjac->y,y_array)); 662 /* apply the symmetric left portion of the inner PC operator */ 663 /* note this by-passes the inner KSP and its options completely */ 664 CHKERRQ(KSPGetPC(jac->ksp[0],&subpc)); 665 CHKERRQ(PCApplySymmetricLeft(subpc,bjac->x,bjac->y)); 666 CHKERRQ(VecResetArray(bjac->x)); 667 CHKERRQ(VecResetArray(bjac->y)); 668 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 669 CHKERRQ(VecRestoreArray(y,&y_array)); 670 PetscFunctionReturn(0); 671 } 672 673 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y) 674 { 675 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 676 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data; 677 PetscScalar *y_array; 678 const PetscScalar *x_array; 679 PC subpc; 680 681 PetscFunctionBegin; 682 /* 683 The VecPlaceArray() is to avoid having to copy the 684 y vector into the bjac->x vector. The reason for 685 the bjac->x vector is that we need a sequential vector 686 for the sequential solve. 687 */ 688 CHKERRQ(VecGetArrayRead(x,&x_array)); 689 CHKERRQ(VecGetArray(y,&y_array)); 690 CHKERRQ(VecPlaceArray(bjac->x,x_array)); 691 CHKERRQ(VecPlaceArray(bjac->y,y_array)); 692 693 /* apply the symmetric right portion of the inner PC operator */ 694 /* note this by-passes the inner KSP and its options completely */ 695 696 CHKERRQ(KSPGetPC(jac->ksp[0],&subpc)); 697 CHKERRQ(PCApplySymmetricRight(subpc,bjac->x,bjac->y)); 698 699 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 700 CHKERRQ(VecRestoreArray(y,&y_array)); 701 PetscFunctionReturn(0); 702 } 703 704 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y) 705 { 706 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 707 PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data; 708 PetscScalar *y_array; 709 const PetscScalar *x_array; 710 711 PetscFunctionBegin; 712 /* 713 The VecPlaceArray() is to avoid having to copy the 714 y vector into the bjac->x vector. The reason for 715 the bjac->x vector is that we need a sequential vector 716 for the sequential solve. 717 */ 718 CHKERRQ(VecGetArrayRead(x,&x_array)); 719 CHKERRQ(VecGetArray(y,&y_array)); 720 CHKERRQ(VecPlaceArray(bjac->x,x_array)); 721 CHKERRQ(VecPlaceArray(bjac->y,y_array)); 722 CHKERRQ(KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y)); 723 CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,bjac->y)); 724 CHKERRQ(VecResetArray(bjac->x)); 725 CHKERRQ(VecResetArray(bjac->y)); 726 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 727 CHKERRQ(VecRestoreArray(y,&y_array)); 728 PetscFunctionReturn(0); 729 } 730 731 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat) 732 { 733 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 734 PetscInt m; 735 KSP ksp; 736 PC_BJacobi_Singleblock *bjac; 737 PetscBool wasSetup = PETSC_TRUE; 738 VecType vectype; 739 const char *prefix; 740 741 PetscFunctionBegin; 742 if (!pc->setupcalled) { 743 if (!jac->ksp) { 744 wasSetup = PETSC_FALSE; 745 746 CHKERRQ(KSPCreate(PETSC_COMM_SELF,&ksp)); 747 CHKERRQ(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure)); 748 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1)); 749 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp)); 750 CHKERRQ(KSPSetType(ksp,KSPPREONLY)); 751 CHKERRQ(PCGetOptionsPrefix(pc,&prefix)); 752 CHKERRQ(KSPSetOptionsPrefix(ksp,prefix)); 753 CHKERRQ(KSPAppendOptionsPrefix(ksp,"sub_")); 754 755 pc->ops->reset = PCReset_BJacobi_Singleblock; 756 pc->ops->destroy = PCDestroy_BJacobi_Singleblock; 757 pc->ops->apply = PCApply_BJacobi_Singleblock; 758 pc->ops->matapply = PCMatApply_BJacobi_Singleblock; 759 pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Singleblock; 760 pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock; 761 pc->ops->applytranspose = PCApplyTranspose_BJacobi_Singleblock; 762 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Singleblock; 763 764 CHKERRQ(PetscMalloc1(1,&jac->ksp)); 765 jac->ksp[0] = ksp; 766 767 CHKERRQ(PetscNewLog(pc,&bjac)); 768 jac->data = (void*)bjac; 769 } else { 770 ksp = jac->ksp[0]; 771 bjac = (PC_BJacobi_Singleblock*)jac->data; 772 } 773 774 /* 775 The reason we need to generate these vectors is to serve 776 as the right-hand side and solution vector for the solve on the 777 block. We do not need to allocate space for the vectors since 778 that is provided via VecPlaceArray() just before the call to 779 KSPSolve() on the block. 780 */ 781 CHKERRQ(MatGetSize(pmat,&m,&m)); 782 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x)); 783 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y)); 784 CHKERRQ(MatGetVecType(pmat,&vectype)); 785 CHKERRQ(VecSetType(bjac->x,vectype)); 786 CHKERRQ(VecSetType(bjac->y,vectype)); 787 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x)); 788 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y)); 789 } else { 790 ksp = jac->ksp[0]; 791 bjac = (PC_BJacobi_Singleblock*)jac->data; 792 } 793 CHKERRQ(KSPGetOptionsPrefix(ksp,&prefix)); 794 if (pc->useAmat) { 795 CHKERRQ(KSPSetOperators(ksp,mat,pmat)); 796 CHKERRQ(MatSetOptionsPrefix(mat,prefix)); 797 } else { 798 CHKERRQ(KSPSetOperators(ksp,pmat,pmat)); 799 } 800 CHKERRQ(MatSetOptionsPrefix(pmat,prefix)); 801 if (!wasSetup && pc->setfromoptionscalled) { 802 /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */ 803 CHKERRQ(KSPSetFromOptions(ksp)); 804 } 805 PetscFunctionReturn(0); 806 } 807 808 /* ---------------------------------------------------------------------------------------------*/ 809 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc) 810 { 811 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 812 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data; 813 PetscInt i; 814 815 PetscFunctionBegin; 816 if (bjac && bjac->pmat) { 817 CHKERRQ(MatDestroyMatrices(jac->n_local,&bjac->pmat)); 818 if (pc->useAmat) { 819 CHKERRQ(MatDestroyMatrices(jac->n_local,&bjac->mat)); 820 } 821 } 822 823 for (i=0; i<jac->n_local; i++) { 824 CHKERRQ(KSPReset(jac->ksp[i])); 825 if (bjac && bjac->x) { 826 CHKERRQ(VecDestroy(&bjac->x[i])); 827 CHKERRQ(VecDestroy(&bjac->y[i])); 828 CHKERRQ(ISDestroy(&bjac->is[i])); 829 } 830 } 831 CHKERRQ(PetscFree(jac->l_lens)); 832 CHKERRQ(PetscFree(jac->g_lens)); 833 PetscFunctionReturn(0); 834 } 835 836 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc) 837 { 838 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 839 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data; 840 PetscInt i; 841 842 PetscFunctionBegin; 843 CHKERRQ(PCReset_BJacobi_Multiblock(pc)); 844 if (bjac) { 845 CHKERRQ(PetscFree2(bjac->x,bjac->y)); 846 CHKERRQ(PetscFree(bjac->starts)); 847 CHKERRQ(PetscFree(bjac->is)); 848 } 849 CHKERRQ(PetscFree(jac->data)); 850 for (i=0; i<jac->n_local; i++) { 851 CHKERRQ(KSPDestroy(&jac->ksp[i])); 852 } 853 CHKERRQ(PetscFree(jac->ksp)); 854 CHKERRQ(PetscFree(pc->data)); 855 PetscFunctionReturn(0); 856 } 857 858 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc) 859 { 860 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 861 PetscInt i,n_local = jac->n_local; 862 KSPConvergedReason reason; 863 864 PetscFunctionBegin; 865 for (i=0; i<n_local; i++) { 866 CHKERRQ(KSPSetUp(jac->ksp[i])); 867 CHKERRQ(KSPGetConvergedReason(jac->ksp[i],&reason)); 868 if (reason == KSP_DIVERGED_PC_FAILED) { 869 pc->failedreason = PC_SUBPC_ERROR; 870 } 871 } 872 PetscFunctionReturn(0); 873 } 874 875 /* 876 Preconditioner for block Jacobi 877 */ 878 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y) 879 { 880 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 881 PetscInt i,n_local = jac->n_local; 882 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data; 883 PetscScalar *yin; 884 const PetscScalar *xin; 885 886 PetscFunctionBegin; 887 CHKERRQ(VecGetArrayRead(x,&xin)); 888 CHKERRQ(VecGetArray(y,&yin)); 889 for (i=0; i<n_local; i++) { 890 /* 891 To avoid copying the subvector from x into a workspace we instead 892 make the workspace vector array point to the subpart of the array of 893 the global vector. 894 */ 895 CHKERRQ(VecPlaceArray(bjac->x[i],xin+bjac->starts[i])); 896 CHKERRQ(VecPlaceArray(bjac->y[i],yin+bjac->starts[i])); 897 898 CHKERRQ(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0)); 899 CHKERRQ(KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i])); 900 CHKERRQ(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i])); 901 CHKERRQ(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0)); 902 903 CHKERRQ(VecResetArray(bjac->x[i])); 904 CHKERRQ(VecResetArray(bjac->y[i])); 905 } 906 CHKERRQ(VecRestoreArrayRead(x,&xin)); 907 CHKERRQ(VecRestoreArray(y,&yin)); 908 PetscFunctionReturn(0); 909 } 910 911 /* 912 Preconditioner for block Jacobi 913 */ 914 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y) 915 { 916 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 917 PetscInt i,n_local = jac->n_local; 918 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data; 919 PetscScalar *yin; 920 const PetscScalar *xin; 921 922 PetscFunctionBegin; 923 CHKERRQ(VecGetArrayRead(x,&xin)); 924 CHKERRQ(VecGetArray(y,&yin)); 925 for (i=0; i<n_local; i++) { 926 /* 927 To avoid copying the subvector from x into a workspace we instead 928 make the workspace vector array point to the subpart of the array of 929 the global vector. 930 */ 931 CHKERRQ(VecPlaceArray(bjac->x[i],xin+bjac->starts[i])); 932 CHKERRQ(VecPlaceArray(bjac->y[i],yin+bjac->starts[i])); 933 934 CHKERRQ(PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0)); 935 CHKERRQ(KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i])); 936 CHKERRQ(KSPCheckSolve(jac->ksp[i],pc,bjac->y[i])); 937 CHKERRQ(PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0)); 938 939 CHKERRQ(VecResetArray(bjac->x[i])); 940 CHKERRQ(VecResetArray(bjac->y[i])); 941 } 942 CHKERRQ(VecRestoreArrayRead(x,&xin)); 943 CHKERRQ(VecRestoreArray(y,&yin)); 944 PetscFunctionReturn(0); 945 } 946 947 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat) 948 { 949 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 950 PetscInt m,n_local,N,M,start,i; 951 const char *prefix; 952 KSP ksp; 953 Vec x,y; 954 PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data; 955 PC subpc; 956 IS is; 957 MatReuse scall; 958 VecType vectype; 959 960 PetscFunctionBegin; 961 CHKERRQ(MatGetLocalSize(pc->pmat,&M,&N)); 962 963 n_local = jac->n_local; 964 965 if (pc->useAmat) { 966 PetscBool same; 967 CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same)); 968 PetscCheckFalse(!same,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type"); 969 } 970 971 if (!pc->setupcalled) { 972 scall = MAT_INITIAL_MATRIX; 973 974 if (!jac->ksp) { 975 pc->ops->reset = PCReset_BJacobi_Multiblock; 976 pc->ops->destroy = PCDestroy_BJacobi_Multiblock; 977 pc->ops->apply = PCApply_BJacobi_Multiblock; 978 pc->ops->matapply = NULL; 979 pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock; 980 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock; 981 982 CHKERRQ(PetscNewLog(pc,&bjac)); 983 CHKERRQ(PetscMalloc1(n_local,&jac->ksp)); 984 CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP)))); 985 CHKERRQ(PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y)); 986 CHKERRQ(PetscMalloc1(n_local,&bjac->starts)); 987 CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar)))); 988 989 jac->data = (void*)bjac; 990 CHKERRQ(PetscMalloc1(n_local,&bjac->is)); 991 CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS)))); 992 993 for (i=0; i<n_local; i++) { 994 CHKERRQ(KSPCreate(PETSC_COMM_SELF,&ksp)); 995 CHKERRQ(KSPSetErrorIfNotConverged(ksp,pc->erroriffailure)); 996 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1)); 997 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp)); 998 CHKERRQ(KSPSetType(ksp,KSPPREONLY)); 999 CHKERRQ(KSPGetPC(ksp,&subpc)); 1000 CHKERRQ(PCGetOptionsPrefix(pc,&prefix)); 1001 CHKERRQ(KSPSetOptionsPrefix(ksp,prefix)); 1002 CHKERRQ(KSPAppendOptionsPrefix(ksp,"sub_")); 1003 1004 jac->ksp[i] = ksp; 1005 } 1006 } else { 1007 bjac = (PC_BJacobi_Multiblock*)jac->data; 1008 } 1009 1010 start = 0; 1011 CHKERRQ(MatGetVecType(pmat,&vectype)); 1012 for (i=0; i<n_local; i++) { 1013 m = jac->l_lens[i]; 1014 /* 1015 The reason we need to generate these vectors is to serve 1016 as the right-hand side and solution vector for the solve on the 1017 block. We do not need to allocate space for the vectors since 1018 that is provided via VecPlaceArray() just before the call to 1019 KSPSolve() on the block. 1020 1021 */ 1022 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,m,&x)); 1023 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y)); 1024 CHKERRQ(VecSetType(x,vectype)); 1025 CHKERRQ(VecSetType(y,vectype)); 1026 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)x)); 1027 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)y)); 1028 1029 bjac->x[i] = x; 1030 bjac->y[i] = y; 1031 bjac->starts[i] = start; 1032 1033 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,m,start,1,&is)); 1034 bjac->is[i] = is; 1035 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)is)); 1036 1037 start += m; 1038 } 1039 } else { 1040 bjac = (PC_BJacobi_Multiblock*)jac->data; 1041 /* 1042 Destroy the blocks from the previous iteration 1043 */ 1044 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1045 CHKERRQ(MatDestroyMatrices(n_local,&bjac->pmat)); 1046 if (pc->useAmat) { 1047 CHKERRQ(MatDestroyMatrices(n_local,&bjac->mat)); 1048 } 1049 scall = MAT_INITIAL_MATRIX; 1050 } else scall = MAT_REUSE_MATRIX; 1051 } 1052 1053 CHKERRQ(MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat)); 1054 if (pc->useAmat) { 1055 CHKERRQ(MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat)); 1056 } 1057 /* Return control to the user so that the submatrices can be modified (e.g., to apply 1058 different boundary conditions for the submatrices than for the global problem) */ 1059 CHKERRQ(PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP)); 1060 1061 for (i=0; i<n_local; i++) { 1062 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i])); 1063 CHKERRQ(KSPGetOptionsPrefix(jac->ksp[i],&prefix)); 1064 if (pc->useAmat) { 1065 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i])); 1066 CHKERRQ(KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i])); 1067 CHKERRQ(MatSetOptionsPrefix(bjac->mat[i],prefix)); 1068 } else { 1069 CHKERRQ(KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i])); 1070 } 1071 CHKERRQ(MatSetOptionsPrefix(bjac->pmat[i],prefix)); 1072 if (pc->setfromoptionscalled) { 1073 CHKERRQ(KSPSetFromOptions(jac->ksp[i])); 1074 } 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /* ---------------------------------------------------------------------------------------------*/ 1080 /* 1081 These are for a single block with multiple processes 1082 */ 1083 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc) 1084 { 1085 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 1086 KSP subksp = jac->ksp[0]; 1087 KSPConvergedReason reason; 1088 1089 PetscFunctionBegin; 1090 CHKERRQ(KSPSetUp(subksp)); 1091 CHKERRQ(KSPGetConvergedReason(subksp,&reason)); 1092 if (reason == KSP_DIVERGED_PC_FAILED) { 1093 pc->failedreason = PC_SUBPC_ERROR; 1094 } 1095 PetscFunctionReturn(0); 1096 } 1097 1098 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc) 1099 { 1100 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 1101 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data; 1102 1103 PetscFunctionBegin; 1104 CHKERRQ(VecDestroy(&mpjac->ysub)); 1105 CHKERRQ(VecDestroy(&mpjac->xsub)); 1106 CHKERRQ(MatDestroy(&mpjac->submats)); 1107 if (jac->ksp) CHKERRQ(KSPReset(jac->ksp[0])); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc) 1112 { 1113 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 1114 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data; 1115 1116 PetscFunctionBegin; 1117 CHKERRQ(PCReset_BJacobi_Multiproc(pc)); 1118 CHKERRQ(KSPDestroy(&jac->ksp[0])); 1119 CHKERRQ(PetscFree(jac->ksp)); 1120 CHKERRQ(PetscSubcommDestroy(&mpjac->psubcomm)); 1121 1122 CHKERRQ(PetscFree(mpjac)); 1123 CHKERRQ(PetscFree(pc->data)); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y) 1128 { 1129 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 1130 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data; 1131 PetscScalar *yarray; 1132 const PetscScalar *xarray; 1133 KSPConvergedReason reason; 1134 1135 PetscFunctionBegin; 1136 /* place x's and y's local arrays into xsub and ysub */ 1137 CHKERRQ(VecGetArrayRead(x,&xarray)); 1138 CHKERRQ(VecGetArray(y,&yarray)); 1139 CHKERRQ(VecPlaceArray(mpjac->xsub,xarray)); 1140 CHKERRQ(VecPlaceArray(mpjac->ysub,yarray)); 1141 1142 /* apply preconditioner on each matrix block */ 1143 CHKERRQ(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0)); 1144 CHKERRQ(KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub)); 1145 CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,mpjac->ysub)); 1146 CHKERRQ(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0)); 1147 CHKERRQ(KSPGetConvergedReason(jac->ksp[0],&reason)); 1148 if (reason == KSP_DIVERGED_PC_FAILED) { 1149 pc->failedreason = PC_SUBPC_ERROR; 1150 } 1151 1152 CHKERRQ(VecResetArray(mpjac->xsub)); 1153 CHKERRQ(VecResetArray(mpjac->ysub)); 1154 CHKERRQ(VecRestoreArrayRead(x,&xarray)); 1155 CHKERRQ(VecRestoreArray(y,&yarray)); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc,Mat X,Mat Y) 1160 { 1161 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 1162 KSPConvergedReason reason; 1163 Mat sX,sY; 1164 const PetscScalar *x; 1165 PetscScalar *y; 1166 PetscInt m,N,lda,ldb; 1167 1168 PetscFunctionBegin; 1169 /* apply preconditioner on each matrix block */ 1170 CHKERRQ(MatGetLocalSize(X,&m,NULL)); 1171 CHKERRQ(MatGetSize(X,NULL,&N)); 1172 CHKERRQ(MatDenseGetLDA(X,&lda)); 1173 CHKERRQ(MatDenseGetLDA(Y,&ldb)); 1174 CHKERRQ(MatDenseGetArrayRead(X,&x)); 1175 CHKERRQ(MatDenseGetArrayWrite(Y,&y)); 1176 CHKERRQ(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,(PetscScalar*)x,&sX)); 1177 CHKERRQ(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,y,&sY)); 1178 CHKERRQ(MatDenseSetLDA(sX,lda)); 1179 CHKERRQ(MatDenseSetLDA(sY,ldb)); 1180 CHKERRQ(PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0)); 1181 CHKERRQ(KSPMatSolve(jac->ksp[0],sX,sY)); 1182 CHKERRQ(KSPCheckSolve(jac->ksp[0],pc,NULL)); 1183 CHKERRQ(PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0)); 1184 CHKERRQ(MatDestroy(&sY)); 1185 CHKERRQ(MatDestroy(&sX)); 1186 CHKERRQ(MatDenseRestoreArrayWrite(Y,&y)); 1187 CHKERRQ(MatDenseRestoreArrayRead(X,&x)); 1188 CHKERRQ(KSPGetConvergedReason(jac->ksp[0],&reason)); 1189 if (reason == KSP_DIVERGED_PC_FAILED) { 1190 pc->failedreason = PC_SUBPC_ERROR; 1191 } 1192 PetscFunctionReturn(0); 1193 } 1194 1195 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc) 1196 { 1197 PC_BJacobi *jac = (PC_BJacobi*)pc->data; 1198 PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data; 1199 PetscInt m,n; 1200 MPI_Comm comm,subcomm=0; 1201 const char *prefix; 1202 PetscBool wasSetup = PETSC_TRUE; 1203 VecType vectype; 1204 1205 PetscFunctionBegin; 1206 CHKERRQ(PetscObjectGetComm((PetscObject)pc,&comm)); 1207 PetscCheckFalse(jac->n_local > 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported"); 1208 jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */ 1209 if (!pc->setupcalled) { 1210 wasSetup = PETSC_FALSE; 1211 CHKERRQ(PetscNewLog(pc,&mpjac)); 1212 jac->data = (void*)mpjac; 1213 1214 /* initialize datastructure mpjac */ 1215 if (!jac->psubcomm) { 1216 /* Create default contiguous subcommunicatiors if user does not provide them */ 1217 CHKERRQ(PetscSubcommCreate(comm,&jac->psubcomm)); 1218 CHKERRQ(PetscSubcommSetNumber(jac->psubcomm,jac->n)); 1219 CHKERRQ(PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS)); 1220 CHKERRQ(PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm))); 1221 } 1222 mpjac->psubcomm = jac->psubcomm; 1223 subcomm = PetscSubcommChild(mpjac->psubcomm); 1224 1225 /* Get matrix blocks of pmat */ 1226 CHKERRQ(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats)); 1227 1228 /* create a new PC that processors in each subcomm have copy of */ 1229 CHKERRQ(PetscMalloc1(1,&jac->ksp)); 1230 CHKERRQ(KSPCreate(subcomm,&jac->ksp[0])); 1231 CHKERRQ(KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure)); 1232 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1)); 1233 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0])); 1234 CHKERRQ(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats)); 1235 CHKERRQ(KSPGetPC(jac->ksp[0],&mpjac->pc)); 1236 1237 CHKERRQ(PCGetOptionsPrefix(pc,&prefix)); 1238 CHKERRQ(KSPSetOptionsPrefix(jac->ksp[0],prefix)); 1239 CHKERRQ(KSPAppendOptionsPrefix(jac->ksp[0],"sub_")); 1240 CHKERRQ(KSPGetOptionsPrefix(jac->ksp[0],&prefix)); 1241 CHKERRQ(MatSetOptionsPrefix(mpjac->submats,prefix)); 1242 1243 /* create dummy vectors xsub and ysub */ 1244 CHKERRQ(MatGetLocalSize(mpjac->submats,&m,&n)); 1245 CHKERRQ(VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub)); 1246 CHKERRQ(VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub)); 1247 CHKERRQ(MatGetVecType(mpjac->submats,&vectype)); 1248 CHKERRQ(VecSetType(mpjac->xsub,vectype)); 1249 CHKERRQ(VecSetType(mpjac->ysub,vectype)); 1250 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub)); 1251 CHKERRQ(PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub)); 1252 1253 pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc; 1254 pc->ops->reset = PCReset_BJacobi_Multiproc; 1255 pc->ops->destroy = PCDestroy_BJacobi_Multiproc; 1256 pc->ops->apply = PCApply_BJacobi_Multiproc; 1257 pc->ops->matapply = PCMatApply_BJacobi_Multiproc; 1258 } else { /* pc->setupcalled */ 1259 subcomm = PetscSubcommChild(mpjac->psubcomm); 1260 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1261 /* destroy old matrix blocks, then get new matrix blocks */ 1262 if (mpjac->submats) CHKERRQ(MatDestroy(&mpjac->submats)); 1263 CHKERRQ(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats)); 1264 } else { 1265 CHKERRQ(MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats)); 1266 } 1267 CHKERRQ(KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats)); 1268 } 1269 1270 if (!wasSetup && pc->setfromoptionscalled) { 1271 CHKERRQ(KSPSetFromOptions(jac->ksp[0])); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275