1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an additive Schwarz preconditioner for any Mat implementation. 5 6 Note that each processor may have any number of subdomains. But in order to 7 deal easily with the VecScatter(), we treat each processor as if it has the 8 same number of subdomains. 9 10 n - total number of true subdomains on all processors 11 n_local_true - actual number of subdomains on this processor 12 n_local = maximum over all processors of n_local_true 13 */ 14 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 15 16 typedef struct { 17 PetscInt n,n_local,n_local_true; 18 PetscInt overlap; /* overlap requested by user */ 19 KSP *ksp; /* linear solvers for each block */ 20 VecScatter *scat; /* mapping to subregion */ 21 Vec *x,*y; /* work vectors */ 22 IS *is; /* index set that defines each subdomain */ 23 Mat *mat,*pmat; /* mat is not currently used */ 24 PCASMType type; /* use reduced interpolation, restriction or both */ 25 PetscTruth type_set; /* if user set this value (so won't change it for symmetric problems) */ 26 PetscTruth same_local_solves; /* flag indicating whether all local solvers are same */ 27 PetscTruth inplace; /* indicates that the sub-matrices are deleted after 28 PCSetUpOnBlocks() is done. Similar to inplace 29 factorization in the case of LU and ILU */ 30 } PC_ASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCView_ASM" 34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer) 35 { 36 PC_ASM *osm = (PC_ASM*)pc->data; 37 PetscErrorCode ierr; 38 PetscMPIInt rank; 39 PetscInt i; 40 PetscTruth iascii,isstring; 41 PetscViewer sviewer; 42 43 44 PetscFunctionBegin; 45 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 46 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 47 if (iascii) { 48 if (osm->n > 0) { 49 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr); 50 } else { 51 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr); 54 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 55 if (osm->same_local_solves) { 56 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr); 57 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 58 if (!rank && osm->ksp) { 59 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 60 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 61 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 62 } 63 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 64 } else { 65 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr); 66 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D\n",rank,osm->n_local_true);CHKERRQ(ierr); 67 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 68 for (i=0; i<osm->n_local; i++) { 69 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 70 if (i < osm->n_local_true) { 71 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D\n",rank,i);CHKERRQ(ierr); 72 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 73 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 74 } 75 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 76 } 77 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 78 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 79 } 80 } else if (isstring) { 81 ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%D",osm->n,osm->overlap,osm->type);CHKERRQ(ierr); 82 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 83 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 84 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 85 } else { 86 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCASMPrintSubdomains" 93 static PetscErrorCode PCASMPrintSubdomains(PC pc) 94 { 95 PC_ASM *osm = (PC_ASM*)pc->data; 96 const char *prefix; 97 char fname[PETSC_MAX_PATH_LEN+1]; 98 PetscViewer viewer; 99 PetscInt i,j,nidx; 100 const PetscInt *idx; 101 PetscErrorCode ierr; 102 PetscFunctionBegin; 103 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 104 ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains", 105 fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr); 106 if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 107 ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr); 108 for (i=0;i<osm->n_local_true;i++) { 109 ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr); 110 ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr); 111 for (j=0; j<nidx; j++) { 112 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 113 } 114 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 115 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 116 } 117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 118 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "PCSetUp_ASM" 124 static PetscErrorCode PCSetUp_ASM(PC pc) 125 { 126 PC_ASM *osm = (PC_ASM*)pc->data; 127 PetscErrorCode ierr; 128 PetscTruth symset,flg; 129 PetscInt i,m,start,start_val,end_val,mbs,bs; 130 PetscMPIInt size; 131 MatReuse scall = MAT_REUSE_MATRIX; 132 IS isl; 133 KSP ksp; 134 PC subpc; 135 const char *prefix,*pprefix; 136 Vec vec; 137 138 PetscFunctionBegin; 139 if (!pc->setupcalled) { 140 141 if (!osm->type_set) { 142 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 143 if (symset && flg) { osm->type = PC_ASM_BASIC; } 144 } 145 146 if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) { 147 /* no subdomains given, use one per processor */ 148 osm->n_local = osm->n_local_true = 1; 149 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 150 osm->n = size; 151 } else if (osm->n == PETSC_DECIDE) { 152 /* determine global number of subdomains */ 153 PetscInt inwork[2],outwork[2]; 154 inwork[0] = inwork[1] = osm->n_local_true; 155 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 156 osm->n_local = outwork[0]; 157 osm->n = outwork[1]; 158 } 159 160 if (!osm->is){ /* build the index sets */ 161 ierr = MatGetOwnershipRange(pc->pmat,&start_val,&end_val);CHKERRQ(ierr); 162 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 163 if (end_val/bs*bs != end_val || start_val/bs*bs != start_val) { 164 SETERRQ(PETSC_ERR_ARG_WRONG,"Bad distribution for matrix block size"); 165 } 166 ierr = PetscMalloc(osm->n_local_true*sizeof(IS *),&osm->is);CHKERRQ(ierr); 167 mbs = (end_val - start_val)/bs; 168 start = start_val; 169 for (i=0; i<osm->n_local_true; i++){ 170 size = (mbs/osm->n_local_true + ((mbs % osm->n_local_true) > i))*bs; 171 ierr = ISCreateStride(PETSC_COMM_SELF,size,start,1,&isl);CHKERRQ(ierr); 172 start += size; 173 osm->is[i] = isl; 174 } 175 } 176 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 177 ierr = PetscOptionsHasName(prefix,"-pc_asm_print_subdomains",&flg);CHKERRQ(ierr); 178 if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); } 179 180 ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 181 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->scat);CHKERRQ(ierr); 182 ierr = PetscMalloc(2*osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr); 183 osm->y = osm->x + osm->n_local; 184 185 /* Extend the "overlapping" regions by a number of steps */ 186 ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr); 187 for (i=0; i<osm->n_local_true; i++) { 188 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 189 } 190 191 /* Create the local work vectors and scatter contexts */ 192 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 193 for (i=0; i<osm->n_local_true; i++) { 194 ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr); 195 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr); 196 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 197 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 198 ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr); 199 ierr = ISDestroy(isl);CHKERRQ(ierr); 200 } 201 for (i=osm->n_local_true; i<osm->n_local; i++) { 202 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr); 203 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 204 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr); 205 ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr); 206 ierr = ISDestroy(isl);CHKERRQ(ierr); 207 } 208 ierr = VecDestroy(vec);CHKERRQ(ierr); 209 210 /* 211 Create the local solvers. 212 */ 213 for (i=0; i<osm->n_local_true; i++) { 214 ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr); 215 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 216 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 217 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 218 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 219 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 220 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 221 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 222 osm->ksp[i] = ksp; 223 } 224 scall = MAT_INITIAL_MATRIX; 225 226 } else { 227 /* 228 Destroy the blocks from the previous iteration 229 */ 230 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 231 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 232 scall = MAT_INITIAL_MATRIX; 233 } 234 } 235 236 /* 237 Extract out the submatrices 238 */ 239 ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr); 240 if (scall == MAT_INITIAL_MATRIX) { 241 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 242 for (i=0; i<osm->n_local_true; i++) { 243 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 244 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 245 } 246 } 247 248 /* Return control to the user so that the submatrices can be modified (e.g., to apply 249 different boundary conditions for the submatrices than for the global problem) */ 250 ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 251 252 /* 253 Loop over subdomains putting them into local ksp 254 */ 255 for (i=0; i<osm->n_local_true; i++) { 256 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 257 if (!pc->setupcalled) { 258 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 259 } 260 } 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "PCSetUpOnBlocks_ASM" 266 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc) 267 { 268 PC_ASM *osm = (PC_ASM*)pc->data; 269 PetscErrorCode ierr; 270 PetscInt i; 271 272 PetscFunctionBegin; 273 for (i=0; i<osm->n_local_true; i++) { 274 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 275 } 276 /* 277 If inplace flag is set, then destroy the matrix after the setup 278 on blocks is done. 279 */ 280 if (osm->inplace && osm->n_local_true > 0) { 281 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 282 } 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "PCApply_ASM" 288 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y) 289 { 290 PC_ASM *osm = (PC_ASM*)pc->data; 291 PetscErrorCode ierr; 292 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 293 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 294 295 PetscFunctionBegin; 296 /* 297 Support for limiting the restriction or interpolation to only local 298 subdomain values (leaving the other values 0). 299 */ 300 if (!(osm->type & PC_ASM_RESTRICT)) { 301 forward = SCATTER_FORWARD_LOCAL; 302 /* have to zero the work RHS since scatter may leave some slots empty */ 303 for (i=0; i<n_local_true; i++) { 304 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 305 } 306 } 307 if (!(osm->type & PC_ASM_INTERPOLATE)) { 308 reverse = SCATTER_REVERSE_LOCAL; 309 } 310 311 for (i=0; i<n_local; i++) { 312 ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 313 } 314 ierr = VecSet(y,0.0);CHKERRQ(ierr); 315 /* do the local solves */ 316 for (i=0; i<n_local_true; i++) { 317 ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 318 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 319 ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 320 } 321 /* handle the rest of the scatters that do not have local solves */ 322 for (i=n_local_true; i<n_local; i++) { 323 ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 324 ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 325 } 326 for (i=0; i<n_local; i++) { 327 ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "PCApplyTranspose_ASM" 334 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y) 335 { 336 PC_ASM *osm = (PC_ASM*)pc->data; 337 PetscErrorCode ierr; 338 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 339 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 340 341 PetscFunctionBegin; 342 /* 343 Support for limiting the restriction or interpolation to only local 344 subdomain values (leaving the other values 0). 345 346 Note: these are reversed from the PCApply_ASM() because we are applying the 347 transpose of the three terms 348 */ 349 if (!(osm->type & PC_ASM_INTERPOLATE)) { 350 forward = SCATTER_FORWARD_LOCAL; 351 /* have to zero the work RHS since scatter may leave some slots empty */ 352 for (i=0; i<n_local_true; i++) { 353 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 354 } 355 } 356 if (!(osm->type & PC_ASM_RESTRICT)) { 357 reverse = SCATTER_REVERSE_LOCAL; 358 } 359 360 for (i=0; i<n_local; i++) { 361 ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 362 } 363 ierr = VecSet(y,0.0);CHKERRQ(ierr); 364 /* do the local solves */ 365 for (i=0; i<n_local_true; i++) { 366 ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 367 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 368 ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 369 } 370 /* handle the rest of the scatters that do not have local solves */ 371 for (i=n_local_true; i<n_local; i++) { 372 ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 373 ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 374 } 375 for (i=0; i<n_local; i++) { 376 ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 377 } 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "PCDestroy_ASM" 383 static PetscErrorCode PCDestroy_ASM(PC pc) 384 { 385 PC_ASM *osm = (PC_ASM*)pc->data; 386 PetscErrorCode ierr; 387 PetscInt i; 388 389 PetscFunctionBegin; 390 if (osm->ksp) { 391 for (i=0; i<osm->n_local_true; i++) { 392 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 393 } 394 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 395 } 396 if (osm->pmat && !osm->inplace) { 397 if (osm->n_local_true > 0) { 398 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 399 } 400 } 401 if (osm->scat) { 402 for (i=0; i<osm->n_local; i++) { 403 ierr = VecScatterDestroy(osm->scat[i]);CHKERRQ(ierr); 404 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 405 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 406 } 407 ierr = PetscFree(osm->scat);CHKERRQ(ierr); 408 ierr = PetscFree(osm->x);CHKERRQ(ierr); 409 } 410 if (osm->is) { 411 for (i=0; i<osm->n_local_true; i++) { 412 ierr = ISDestroy(osm->is[i]);CHKERRQ(ierr); 413 } 414 ierr = PetscFree(osm->is);CHKERRQ(ierr); 415 } 416 ierr = PetscFree(osm);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCSetFromOptions_ASM" 422 static PetscErrorCode PCSetFromOptions_ASM(PC pc) 423 { 424 PC_ASM *osm = (PC_ASM*)pc->data; 425 PetscErrorCode ierr; 426 PetscInt blocks,ovl; 427 PetscTruth symset,flg; 428 PCASMType asmtype; 429 430 PetscFunctionBegin; 431 /* set the type to symmetric if matrix is symmetric */ 432 if (!osm->type_set && pc->pmat) { 433 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 434 if (symset && flg) { osm->type = PC_ASM_BASIC; } 435 } 436 ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr); 437 ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains", 438 osm->n,&blocks,&flg);CHKERRQ(ierr); 439 if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL);CHKERRQ(ierr); } 440 ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap", 441 osm->overlap,&ovl,&flg);CHKERRQ(ierr); 442 if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 443 ierr = PetscOptionsName("-pc_asm_in_place","Perform matrix factorization inplace","PCASMSetUseInPlace",&flg);CHKERRQ(ierr); 444 if (flg) {ierr = PCASMSetUseInPlace(pc);CHKERRQ(ierr); } 445 ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType", 446 PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr); 447 if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); } 448 ierr = PetscOptionsTail();CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 /*------------------------------------------------------------------------------------*/ 453 454 EXTERN_C_BEGIN 455 #undef __FUNCT__ 456 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM" 457 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[]) 458 { 459 PC_ASM *osm = (PC_ASM*)pc->data; 460 PetscErrorCode ierr; 461 PetscInt i; 462 463 PetscFunctionBegin; 464 if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 465 if (pc->setupcalled && (n != osm->n_local_true || is)) { 466 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup()."); 467 } 468 if (!pc->setupcalled) { 469 if (is) { 470 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 471 } 472 if (osm->is) { 473 for (i=0; i<osm->n_local_true; i++) {ierr = ISDestroy(osm->is[i]);CHKERRQ(ierr);} 474 ierr = PetscFree(osm->is);CHKERRQ(ierr); 475 } 476 osm->n_local_true = n; 477 osm->is = 0; 478 if (is) { 479 ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr); 480 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 481 } 482 } 483 PetscFunctionReturn(0); 484 } 485 EXTERN_C_END 486 487 EXTERN_C_BEGIN 488 #undef __FUNCT__ 489 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM" 490 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is) 491 { 492 PC_ASM *osm = (PC_ASM*)pc->data; 493 PetscErrorCode ierr; 494 PetscMPIInt rank,size; 495 PetscInt i,n; 496 497 PetscFunctionBegin; 498 if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 499 if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet."); 500 501 /* 502 Split the subdomains equally amoung all processors 503 */ 504 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 505 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 506 n = N/size + ((N % size) > rank); 507 if (!n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have at least one block: total processors %d total blocks %D",(int)size,n); 508 if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup()."); 509 if (!pc->setupcalled) { 510 if (osm->is) { 511 for (i=0; i<osm->n_local_true; i++) { 512 ierr = ISDestroy(osm->is[i]);CHKERRQ(ierr); 513 } 514 ierr = PetscFree(osm->is);CHKERRQ(ierr); 515 } 516 osm->n_local_true = n; 517 osm->is = 0; 518 } 519 PetscFunctionReturn(0); 520 } 521 EXTERN_C_END 522 523 EXTERN_C_BEGIN 524 #undef __FUNCT__ 525 #define __FUNCT__ "PCASMSetOverlap_ASM" 526 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl) 527 { 528 PC_ASM *osm = (PC_ASM*)pc->data; 529 530 PetscFunctionBegin; 531 if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 532 osm->overlap = ovl; 533 PetscFunctionReturn(0); 534 } 535 EXTERN_C_END 536 537 EXTERN_C_BEGIN 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCASMSetType_ASM" 540 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type) 541 { 542 PC_ASM *osm = (PC_ASM*)pc->data; 543 544 PetscFunctionBegin; 545 osm->type = type; 546 osm->type_set = PETSC_TRUE; 547 PetscFunctionReturn(0); 548 } 549 EXTERN_C_END 550 551 EXTERN_C_BEGIN 552 #undef __FUNCT__ 553 #define __FUNCT__ "PCASMGetSubKSP_ASM" 554 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp) 555 { 556 PC_ASM *osm = (PC_ASM*)pc->data; 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 if (osm->n_local_true < 1) { 561 SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 562 } 563 564 if (n_local) { 565 *n_local = osm->n_local_true; 566 } 567 if (first_local) { 568 ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 569 *first_local -= osm->n_local_true; 570 } 571 if (ksp) { 572 /* Assume that local solves are now different; not necessarily 573 true though! This flag is used only for PCView_ASM() */ 574 *ksp = osm->ksp; 575 osm->same_local_solves = PETSC_FALSE; 576 } 577 PetscFunctionReturn(0); 578 } 579 EXTERN_C_END 580 581 EXTERN_C_BEGIN 582 #undef __FUNCT__ 583 #define __FUNCT__ "PCASMSetUseInPlace_ASM" 584 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace_ASM(PC pc) 585 { 586 PC_ASM *osm = (PC_ASM*)pc->data; 587 588 PetscFunctionBegin; 589 osm->inplace = PETSC_TRUE; 590 PetscFunctionReturn(0); 591 } 592 EXTERN_C_END 593 594 /*----------------------------------------------------------------------------*/ 595 #undef __FUNCT__ 596 #define __FUNCT__ "PCASMSetUseInPlace" 597 /*@ 598 PCASMSetUseInPlace - Tells the system to destroy the matrix after setup is done. 599 600 Collective on PC 601 602 Input Parameters: 603 . pc - the preconditioner context 604 605 Options Database Key: 606 . -pc_asm_in_place - Activates in-place factorization 607 608 Note: 609 PCASMSetUseInplace() can only be used with the KSP method KSPPREONLY, and 610 when the original matrix is not required during the Solve process. 611 This destroys the matrix, early thus, saving on memory usage. 612 613 Level: intermediate 614 615 .keywords: PC, set, factorization, direct, inplace, in-place, ASM 616 617 .seealso: PCFactorSetUseInPlace() 618 @*/ 619 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace(PC pc) 620 { 621 PetscErrorCode ierr,(*f)(PC); 622 623 PetscFunctionBegin; 624 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 625 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetUseInPlace_C",(void (**)(void))&f);CHKERRQ(ierr); 626 if (f) { 627 ierr = (*f)(pc);CHKERRQ(ierr); 628 } 629 PetscFunctionReturn(0); 630 } 631 /*----------------------------------------------------------------------------*/ 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "PCASMSetLocalSubdomains" 635 /*@C 636 PCASMSetLocalSubdomains - Sets the local subdomains (for this processor 637 only) for the additive Schwarz preconditioner. 638 639 Collective on PC 640 641 Input Parameters: 642 + pc - the preconditioner context 643 . n - the number of subdomains for this processor (default value = 1) 644 - is - the index sets that define the subdomains for this processor 645 (or PETSC_NULL for PETSc to determine subdomains) 646 647 Notes: 648 The IS numbering is in the parallel, global numbering of the vector. 649 650 By default the ASM preconditioner uses 1 block per processor. 651 652 These index sets cannot be destroyed until after completion of the 653 linear solves for which the ASM preconditioner is being used. 654 655 Use PCASMSetTotalSubdomains() to set the subdomains for all processors. 656 657 Level: advanced 658 659 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 660 661 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 662 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 663 @*/ 664 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[]) 665 { 666 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[]); 667 668 PetscFunctionBegin; 669 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 670 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 671 if (f) { 672 ierr = (*f)(pc,n,is);CHKERRQ(ierr); 673 } 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNCT__ 678 #define __FUNCT__ "PCASMSetTotalSubdomains" 679 /*@C 680 PCASMSetTotalSubdomains - Sets the subdomains for all processor for the 681 additive Schwarz preconditioner. Either all or no processors in the 682 PC communicator must call this routine, with the same index sets. 683 684 Collective on PC 685 686 Input Parameters: 687 + pc - the preconditioner context 688 . n - the number of subdomains for all processors 689 - is - the index sets that define the subdomains for all processor 690 (or PETSC_NULL for PETSc to determine subdomains) 691 692 Options Database Key: 693 To set the total number of subdomain blocks rather than specify the 694 index sets, use the option 695 . -pc_asm_blocks <blks> - Sets total blocks 696 697 Notes: 698 Currently you cannot use this to set the actual subdomains with the argument is. 699 700 By default the ASM preconditioner uses 1 block per processor. 701 702 These index sets cannot be destroyed until after completion of the 703 linear solves for which the ASM preconditioner is being used. 704 705 Use PCASMSetLocalSubdomains() to set local subdomains. 706 707 Level: advanced 708 709 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz 710 711 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 712 PCASMCreateSubdomains2D() 713 @*/ 714 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS *is) 715 { 716 PetscErrorCode ierr,(*f)(PC,PetscInt,IS *); 717 718 PetscFunctionBegin; 719 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 720 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 721 if (f) { 722 ierr = (*f)(pc,N,is);CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "PCASMSetOverlap" 729 /*@ 730 PCASMSetOverlap - Sets the overlap between a pair of subdomains for the 731 additive Schwarz preconditioner. Either all or no processors in the 732 PC communicator must call this routine. 733 734 Collective on PC 735 736 Input Parameters: 737 + pc - the preconditioner context 738 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 739 740 Options Database Key: 741 . -pc_asm_overlap <ovl> - Sets overlap 742 743 Notes: 744 By default the ASM preconditioner uses 1 block per processor. To use 745 multiple blocks per perocessor, see PCASMSetTotalSubdomains() and 746 PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>). 747 748 The overlap defaults to 1, so if one desires that no additional 749 overlap be computed beyond what may have been set with a call to 750 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl 751 must be set to be 0. In particular, if one does not explicitly set 752 the subdomains an application code, then all overlap would be computed 753 internally by PETSc, and using an overlap of 0 would result in an ASM 754 variant that is equivalent to the block Jacobi preconditioner. 755 756 Note that one can define initial index sets with any overlap via 757 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine 758 PCASMSetOverlap() merely allows PETSc to extend that overlap further 759 if desired. 760 761 Level: intermediate 762 763 .keywords: PC, ASM, set, overlap 764 765 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 766 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 767 @*/ 768 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl) 769 { 770 PetscErrorCode ierr,(*f)(PC,PetscInt); 771 772 PetscFunctionBegin; 773 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 774 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr); 775 if (f) { 776 ierr = (*f)(pc,ovl);CHKERRQ(ierr); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "PCASMSetType" 783 /*@ 784 PCASMSetType - Sets the type of restriction and interpolation used 785 for local problems in the additive Schwarz method. 786 787 Collective on PC 788 789 Input Parameters: 790 + pc - the preconditioner context 791 - type - variant of ASM, one of 792 .vb 793 PC_ASM_BASIC - full interpolation and restriction 794 PC_ASM_RESTRICT - full restriction, local processor interpolation 795 PC_ASM_INTERPOLATE - full interpolation, local processor restriction 796 PC_ASM_NONE - local processor restriction and interpolation 797 .ve 798 799 Options Database Key: 800 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 801 802 Level: intermediate 803 804 .keywords: PC, ASM, set, type 805 806 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(), 807 PCASMCreateSubdomains2D() 808 @*/ 809 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type) 810 { 811 PetscErrorCode ierr,(*f)(PC,PCASMType); 812 813 PetscFunctionBegin; 814 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 815 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr); 816 if (f) { 817 ierr = (*f)(pc,type);CHKERRQ(ierr); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "PCASMGetSubKSP" 824 /*@C 825 PCASMGetSubKSP - Gets the local KSP contexts for all blocks on 826 this processor. 827 828 Collective on PC iff first_local is requested 829 830 Input Parameter: 831 . pc - the preconditioner context 832 833 Output Parameters: 834 + n_local - the number of blocks on this processor or PETSC_NULL 835 . first_local - the global number of the first block on this processor or PETSC_NULL, 836 all processors must request or all must pass PETSC_NULL 837 - ksp - the array of KSP contexts 838 839 Note: 840 After PCASMGetSubKSP() the array of KSPes is not to be freed 841 842 Currently for some matrix implementations only 1 block per processor 843 is supported. 844 845 You must call KSPSetUp() before calling PCASMGetSubKSP(). 846 847 Level: advanced 848 849 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context 850 851 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(), 852 PCASMCreateSubdomains2D(), 853 @*/ 854 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 855 { 856 PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **); 857 858 PetscFunctionBegin; 859 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 860 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr); 861 if (f) { 862 ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr); 863 } else { 864 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC"); 865 } 866 867 PetscFunctionReturn(0); 868 } 869 870 /* -------------------------------------------------------------------------------------*/ 871 /*MC 872 PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 873 its own KSP object. 874 875 Options Database Keys: 876 + -pc_asm_truelocal - Activates PCASMSetUseTrueLocal() 877 . -pc_asm_in_place - Activates in-place factorization 878 . -pc_asm_blocks <blks> - Sets total blocks 879 . -pc_asm_overlap <ovl> - Sets overlap 880 - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 881 882 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 883 will get a different convergence rate due to the default option of -pc_asm_type restrict. Use 884 -pc_asm_type basic to use the standard ASM. 885 886 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 887 than one processor. Defaults to one block per processor. 888 889 To set options on the solvers for each block append -sub_ to all the KSP, and PC 890 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 891 892 To set the options on the solvers separate for each block call PCASMGetSubKSP() 893 and set the options directly on the resulting KSP object (you can access its PC 894 with KSPGetPC()) 895 896 897 Level: beginner 898 899 Concepts: additive Schwarz method 900 901 References: 902 An additive variant of the Schwarz alternating method for the case of many subregions 903 M Dryja, OB Widlund - Courant Institute, New York University Technical report 904 905 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 906 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 907 908 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 909 PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(), 910 PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType(), 911 PCASMSetUseInPlace() 912 M*/ 913 914 EXTERN_C_BEGIN 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCCreate_ASM" 917 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc) 918 { 919 PetscErrorCode ierr; 920 PC_ASM *osm; 921 922 PetscFunctionBegin; 923 ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr); 924 osm->n = PETSC_DECIDE; 925 osm->n_local = 0; 926 osm->n_local_true = 0; 927 osm->overlap = 1; 928 osm->ksp = 0; 929 osm->scat = 0; 930 osm->x = 0; 931 osm->y = 0; 932 osm->is = 0; 933 osm->mat = 0; 934 osm->pmat = 0; 935 osm->type = PC_ASM_RESTRICT; 936 osm->same_local_solves = PETSC_TRUE; 937 osm->inplace = PETSC_FALSE; 938 939 pc->data = (void*)osm; 940 pc->ops->apply = PCApply_ASM; 941 pc->ops->applytranspose = PCApplyTranspose_ASM; 942 pc->ops->setup = PCSetUp_ASM; 943 pc->ops->destroy = PCDestroy_ASM; 944 pc->ops->setfromoptions = PCSetFromOptions_ASM; 945 pc->ops->setuponblocks = PCSetUpOnBlocks_ASM; 946 pc->ops->view = PCView_ASM; 947 pc->ops->applyrichardson = 0; 948 949 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM", 950 PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr); 951 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM", 952 PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr); 953 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM", 954 PCASMSetOverlap_ASM);CHKERRQ(ierr); 955 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM", 956 PCASMSetType_ASM);CHKERRQ(ierr); 957 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM", 958 PCASMGetSubKSP_ASM);CHKERRQ(ierr); 959 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetUseInPlace_C","PCASMSetUseInPlace_ASM", 960 PCASMSetUseInPlace_ASM);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 EXTERN_C_END 964 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "PCASMCreateSubdomains" 968 /*@C 969 PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 970 preconditioner for a any problem on a general grid. 971 972 Collective 973 974 Input Parameters: 975 + A - The global matrix operator 976 - n - the number of local blocks 977 978 Output Parameters: 979 . outis - the array of index sets defining the subdomains 980 981 Level: advanced 982 983 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 984 985 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains() 986 @*/ 987 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 988 { 989 MatPartitioning mpart; 990 const char *prefix; 991 PetscErrorCode (*f)(Mat,PetscTruth*,MatReuse,Mat*); 992 PetscMPIInt size; 993 PetscInt i,j,rstart,rend,bs; 994 PetscTruth iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 995 Mat Ad = PETSC_NULL, adj; 996 IS ispart,isnumb,*is; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1001 PetscValidPointer(outis,3); 1002 if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);} 1003 1004 /* Get prefix, row distribution, and block size */ 1005 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1006 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1007 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1008 if (rstart/bs*bs != rstart || rend/bs*bs != rend) { 1009 SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1010 } 1011 /* Get diagonal block from matrix if possible */ 1012 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1013 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1014 if (f) { 1015 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1016 } else if (size == 1) { 1017 iscopy = PETSC_FALSE; Ad = A; 1018 } else { 1019 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1020 } 1021 if (Ad) { 1022 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1023 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1024 } 1025 if (Ad && n > 1) { 1026 PetscTruth match,done; 1027 /* Try to setup a good matrix partitioning if available */ 1028 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1029 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1030 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1031 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr); 1032 if (!match) { 1033 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr); 1034 } 1035 if (!match) { /* assume a "good" partitioner is available */ 1036 PetscInt na,*ia,*ja; 1037 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1038 if (done) { 1039 /* Build adjacency matrix by hand. Unfortunately a call to 1040 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1041 remove the block-aij structure and we cannot expect 1042 MatPartitioning to split vertices as we need */ 1043 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1044 nnz = 0; 1045 for (i=0; i<na; i++) { /* count number of nonzeros */ 1046 len = ia[i+1] - ia[i]; 1047 row = ja + ia[i]; 1048 for (j=0; j<len; j++) { 1049 if (row[j] == i) { /* don't count diagonal */ 1050 len--; break; 1051 } 1052 } 1053 nnz += len; 1054 } 1055 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1056 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1057 nnz = 0; 1058 iia[0] = 0; 1059 for (i=0; i<na; i++) { /* fill adjacency */ 1060 cnt = 0; 1061 len = ia[i+1] - ia[i]; 1062 row = ja + ia[i]; 1063 for (j=0; j<len; j++) { 1064 if (row[j] != i) { /* if not diagonal */ 1065 jja[nnz+cnt++] = row[j]; 1066 } 1067 } 1068 nnz += cnt; 1069 iia[i+1] = nnz; 1070 } 1071 /* Partitioning of the adjacency matrix */ 1072 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1073 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1074 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1075 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1076 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1077 ierr = MatDestroy(adj);CHKERRQ(ierr); 1078 foundpart = PETSC_TRUE; 1079 } 1080 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1081 } 1082 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1083 } 1084 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1085 1086 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1087 *outis = is; 1088 1089 if (!foundpart) { 1090 1091 /* Partitioning by contiguous chunks of rows */ 1092 1093 PetscInt mbs = (rend-rstart)/bs; 1094 PetscInt start = rstart; 1095 for (i=0; i<n; i++) { 1096 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1097 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1098 start += count; 1099 } 1100 1101 } else { 1102 1103 /* Partitioning by adjacency of diagonal block */ 1104 1105 const PetscInt *numbering; 1106 PetscInt *count,nidx,*indices,*newidx,start=0; 1107 /* Get node count in each partition */ 1108 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1109 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1110 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1111 for (i=0; i<n; i++) count[i] *= bs; 1112 } 1113 /* Build indices from node numbering */ 1114 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1115 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1116 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1117 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1119 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1120 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1121 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1122 for (i=0; i<nidx; i++) 1123 for (j=0; j<bs; j++) 1124 newidx[i*bs+j] = indices[i]*bs + j; 1125 ierr = PetscFree(indices);CHKERRQ(ierr); 1126 nidx *= bs; 1127 indices = newidx; 1128 } 1129 /* Shift to get global indices */ 1130 for (i=0; i<nidx; i++) indices[i] += rstart; 1131 1132 /* Build the index sets for each block */ 1133 for (i=0; i<n; i++) { 1134 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr); 1135 ierr = ISSort(is[i]);CHKERRQ(ierr); 1136 start += count[i]; 1137 } 1138 1139 ierr = PetscFree(count); 1140 ierr = PetscFree(indices); 1141 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1142 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1143 1144 } 1145 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "PCASMDestroySubdomains" 1151 /*@C 1152 PCASMDestroySubdomains - Destroys the index sets created with 1153 PCASMCreateSubdomains(). Should be called after setting subdomains 1154 with PCASMSetLocalSubdomains(). 1155 1156 Collective 1157 1158 Input Parameters: 1159 + n - the number of index sets 1160 - is - the array of index sets 1161 1162 Level: advanced 1163 1164 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 1165 1166 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains() 1167 @*/ 1168 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[]) 1169 { 1170 PetscInt i; 1171 PetscErrorCode ierr; 1172 PetscFunctionBegin; 1173 if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1174 PetscValidPointer(is,2); 1175 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1176 ierr = PetscFree(is);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "PCASMCreateSubdomains2D" 1182 /*@ 1183 PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1184 preconditioner for a two-dimensional problem on a regular grid. 1185 1186 Not Collective 1187 1188 Input Parameters: 1189 + m, n - the number of mesh points in the x and y directions 1190 . M, N - the number of subdomains in the x and y directions 1191 . dof - degrees of freedom per node 1192 - overlap - overlap in mesh lines 1193 1194 Output Parameters: 1195 + Nsub - the number of subdomains created 1196 - is - the array of index sets defining the subdomains 1197 1198 Note: 1199 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1200 preconditioners. More general related routines are 1201 PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains(). 1202 1203 Level: advanced 1204 1205 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid 1206 1207 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 1208 PCASMSetOverlap() 1209 @*/ 1210 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is) 1211 { 1212 PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter; 1213 PetscErrorCode ierr; 1214 PetscInt nidx,*idx,loc,ii,jj,count; 1215 1216 PetscFunctionBegin; 1217 if (dof != 1) SETERRQ(PETSC_ERR_SUP," "); 1218 1219 *Nsub = N*M; 1220 ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr); 1221 ystart = 0; 1222 loc_outter = 0; 1223 for (i=0; i<N; i++) { 1224 height = n/N + ((n % N) > i); /* height of subdomain */ 1225 if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n"); 1226 yleft = ystart - overlap; if (yleft < 0) yleft = 0; 1227 yright = ystart + height + overlap; if (yright > n) yright = n; 1228 xstart = 0; 1229 for (j=0; j<M; j++) { 1230 width = m/M + ((m % M) > j); /* width of subdomain */ 1231 if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m"); 1232 xleft = xstart - overlap; if (xleft < 0) xleft = 0; 1233 xright = xstart + width + overlap; if (xright > m) xright = m; 1234 nidx = (xright - xleft)*(yright - yleft); 1235 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1236 loc = 0; 1237 for (ii=yleft; ii<yright; ii++) { 1238 count = m*ii + xleft; 1239 for (jj=xleft; jj<xright; jj++) { 1240 idx[loc++] = count++; 1241 } 1242 } 1243 ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr); 1244 ierr = PetscFree(idx);CHKERRQ(ierr); 1245 xstart += width; 1246 } 1247 ystart += height; 1248 } 1249 for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); } 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "PCASMGetLocalSubdomains" 1255 /*@C 1256 PCASMGetLocalSubdomains - Gets the local subdomains (for this processor 1257 only) for the additive Schwarz preconditioner. 1258 1259 Collective on PC 1260 1261 Input Parameter: 1262 . pc - the preconditioner context 1263 1264 Output Parameters: 1265 + n - the number of subdomains for this processor (default value = 1) 1266 - is - the index sets that define the subdomains for this processor 1267 1268 1269 Notes: 1270 The IS numbering is in the parallel, global numbering of the vector. 1271 1272 Level: advanced 1273 1274 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 1275 1276 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1277 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices() 1278 @*/ 1279 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[]) 1280 { 1281 PC_ASM *osm; 1282 PetscErrorCode ierr; 1283 PetscTruth match; 1284 1285 PetscFunctionBegin; 1286 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1287 PetscValidIntPointer(n,2); 1288 if (is) PetscValidPointer(is,3); 1289 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1290 if (!match) { 1291 if (n) *n = 0; 1292 if (is) *is = PETSC_NULL; 1293 } else { 1294 osm = (PC_ASM*)pc->data; 1295 if (n) *n = osm->n_local_true; 1296 if (is) *is = osm->is; 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCASMGetLocalSubmatrices" 1303 /*@C 1304 PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1305 only) for the additive Schwarz preconditioner. 1306 1307 Collective on PC 1308 1309 Input Parameter: 1310 . pc - the preconditioner context 1311 1312 Output Parameters: 1313 + n - the number of matrices for this processor (default value = 1) 1314 - mat - the matrices 1315 1316 1317 Level: advanced 1318 1319 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi 1320 1321 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1322 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains() 1323 @*/ 1324 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1325 { 1326 PC_ASM *osm; 1327 PetscErrorCode ierr; 1328 PetscTruth match; 1329 1330 PetscFunctionBegin; 1331 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1332 PetscValidIntPointer(n,2); 1333 if (mat) PetscValidPointer(mat,3); 1334 if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1335 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1336 if (!match) { 1337 if (n) *n = 0; 1338 if (mat) *mat = PETSC_NULL; 1339 } else { 1340 osm = (PC_ASM*)pc->data; 1341 if (n) *n = osm->n_local_true; 1342 if (mat) *mat = osm->pmat; 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346