1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an additive Schwarz preconditioner for any Mat implementation. 5 6 Note that each processor may have any number of subdomains. But in order to 7 deal easily with the VecScatter(), we treat each processor as if it has the 8 same number of subdomains. 9 10 n - total number of true subdomains on all processors 11 n_local_true - actual number of subdomains on this processor 12 n_local = maximum over all processors of n_local_true 13 */ 14 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 15 16 typedef struct { 17 PetscInt n, n_local, n_local_true; 18 PetscInt overlap; /* overlap requested by user */ 19 KSP *ksp; /* linear solvers for each block */ 20 VecScatter *restriction; /* mapping from global to subregion */ 21 VecScatter *localization; /* mapping from overlapping to non-overlapping subregion */ 22 VecScatter *prolongation; /* mapping from subregion to global */ 23 Vec *x,*y,*y_local; /* work vectors */ 24 IS *is; /* index set that defines each overlapping subdomain */ 25 IS *is_local; /* index set that defines each non-overlapping subdomain, may be NULL */ 26 Mat *mat,*pmat; /* mat is not currently used */ 27 PCASMType type; /* use reduced interpolation, restriction or both */ 28 PetscTruth type_set; /* if user set this value (so won't change it for symmetric problems) */ 29 PetscTruth same_local_solves; /* flag indicating whether all local solvers are same */ 30 } PC_ASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCView_ASM" 34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer) 35 { 36 PC_ASM *osm = (PC_ASM*)pc->data; 37 PetscErrorCode ierr; 38 PetscMPIInt rank; 39 PetscInt i,bsz; 40 PetscTruth iascii,isstring; 41 PetscViewer sviewer; 42 43 44 PetscFunctionBegin; 45 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 46 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 47 if (iascii) { 48 if (osm->n > 0) { 49 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr); 50 } else { 51 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr); 54 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 55 if (osm->same_local_solves) { 56 if (osm->ksp) { 57 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr); 58 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 59 if (!rank) { 60 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 61 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 62 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 63 } 64 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 65 } 66 } else { 67 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr); 68 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 69 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr); 70 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 72 for (i=0; i<osm->n_local; i++) { 73 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 74 if (i < osm->n_local_true) { 75 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 76 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr); 77 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 78 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 79 } 80 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 81 } 82 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 83 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 84 } 85 } else if (isstring) { 86 ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr); 87 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 88 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 89 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 90 } else { 91 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCASMPrintSubdomains" 98 static PetscErrorCode PCASMPrintSubdomains(PC pc) 99 { 100 PC_ASM *osm = (PC_ASM*)pc->data; 101 const char *prefix; 102 char fname[PETSC_MAX_PATH_LEN+1]; 103 PetscViewer viewer; 104 PetscInt i,j,nidx; 105 const PetscInt *idx; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 110 ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr); 111 if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 112 ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr); 113 for (i=0;i<osm->n_local_true;i++) { 114 ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr); 115 ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr); 116 for (j=0; j<nidx; j++) { 117 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 118 } 119 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 120 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 121 if (osm->is_local) { 122 ierr = ISGetLocalSize(osm->is_local[i],&nidx);CHKERRQ(ierr); 123 ierr = ISGetIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 124 for (j=0; j<nidx; j++) { 125 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 126 } 127 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 129 } 130 } 131 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 132 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "PCSetUp_ASM" 138 static PetscErrorCode PCSetUp_ASM(PC pc) 139 { 140 PC_ASM *osm = (PC_ASM*)pc->data; 141 PetscErrorCode ierr; 142 PetscTruth symset,flg; 143 PetscInt i,m,firstRow,lastRow; 144 PetscMPIInt size; 145 MatReuse scall = MAT_REUSE_MATRIX; 146 IS isl; 147 KSP ksp; 148 PC subpc; 149 const char *prefix,*pprefix; 150 Vec vec; 151 152 PetscFunctionBegin; 153 if (!pc->setupcalled) { 154 155 if (!osm->type_set) { 156 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 157 if (symset && flg) { osm->type = PC_ASM_BASIC; } 158 } 159 160 if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) { 161 /* no subdomains given, use one per processor */ 162 osm->n_local = osm->n_local_true = 1; 163 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 164 osm->n = size; 165 } else if (osm->n == PETSC_DECIDE) { 166 /* determine global number of subdomains */ 167 PetscInt inwork[2],outwork[2]; 168 inwork[0] = inwork[1] = osm->n_local_true; 169 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 170 osm->n_local = outwork[0]; 171 osm->n = outwork[1]; 172 } 173 174 if (!osm->is){ /* create the index sets */ 175 ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr); 176 } 177 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 178 flg = PETSC_FALSE; 179 ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 180 if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); } 181 182 /* Extend the "overlapping" regions by a number of steps */ 183 ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr); 184 for (i=0; i<osm->n_local_true; i++) { 185 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 186 if (osm->is_local) { 187 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 188 } 189 } 190 191 /* Create the local work vectors and scatter contexts */ 192 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 193 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->restriction);CHKERRQ(ierr); 194 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->localization);CHKERRQ(ierr); 195 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->prolongation);CHKERRQ(ierr); 196 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr); 197 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr); 198 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y_local);CHKERRQ(ierr); 199 ierr = VecGetOwnershipRange(vec, &firstRow, &lastRow);CHKERRQ(ierr); 200 for (i=0; i<osm->n_local_true; ++i, firstRow += m) { 201 ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr); 202 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr); 203 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 204 ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr); 205 ierr = ISDestroy(isl);CHKERRQ(ierr); 206 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 207 if (osm->is_local) { 208 IS isShift; 209 const PetscInt *idx_local; 210 PetscInt *idx, j; 211 212 ierr = ISGetLocalSize(osm->is_local[i],&m);CHKERRQ(ierr); 213 ierr = ISGetIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr); 214 ierr = PetscMalloc(m*sizeof(PetscInt), &idx);CHKERRQ(ierr); 215 for(j = 0; j < m; ++j) { 216 idx[j] = idx_local[j] - firstRow; 217 } 218 ierr = ISRestoreIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr); 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,&isShift);CHKERRQ(ierr); 220 ierr = PetscFree(idx);CHKERRQ(ierr); 221 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 222 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->y_local[i]);CHKERRQ(ierr); 223 ierr = VecScatterCreate(osm->y[i],isShift,osm->y_local[i],isl,&osm->localization[i]);CHKERRQ(ierr); 224 ierr = ISDestroy(isShift);CHKERRQ(ierr); 225 226 ierr = VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);CHKERRQ(ierr); 227 ierr = ISDestroy(isl);CHKERRQ(ierr); 228 } else { 229 osm->y_local[i] = osm->y[i]; 230 ierr = PetscObjectReference((PetscObject) osm->y[i]);CHKERRQ(ierr); 231 osm->localization[i] = osm->restriction[i]; 232 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 233 osm->prolongation[i] = osm->restriction[i]; 234 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 235 } 236 } 237 if (firstRow != lastRow) SETERRQ2(PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow); 238 for (i=osm->n_local_true; i<osm->n_local; i++) { 239 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr); 240 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 241 ierr = VecDuplicate(osm->x[i],&osm->y_local[i]);CHKERRQ(ierr); 242 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr); 243 ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr); 244 osm->localization[i] = osm->restriction[i]; 245 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 246 osm->prolongation[i] = osm->restriction[i]; 247 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 248 ierr = ISDestroy(isl);CHKERRQ(ierr); 249 } 250 ierr = VecDestroy(vec);CHKERRQ(ierr); 251 252 /* Create the local solvers */ 253 ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 254 for (i=0; i<osm->n_local_true; i++) { 255 ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr); 256 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 257 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 258 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 259 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 260 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 261 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 262 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 263 osm->ksp[i] = ksp; 264 } 265 scall = MAT_INITIAL_MATRIX; 266 267 } else { 268 /* 269 Destroy the blocks from the previous iteration 270 */ 271 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 272 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 273 scall = MAT_INITIAL_MATRIX; 274 } 275 } 276 277 /* 278 Extract out the submatrices 279 */ 280 ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr); 281 if (scall == MAT_INITIAL_MATRIX) { 282 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 283 for (i=0; i<osm->n_local_true; i++) { 284 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 285 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 286 } 287 } 288 289 /* Return control to the user so that the submatrices can be modified (e.g., to apply 290 different boundary conditions for the submatrices than for the global problem) */ 291 ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 292 293 /* 294 Loop over subdomains putting them into local ksp 295 */ 296 for (i=0; i<osm->n_local_true; i++) { 297 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 298 if (!pc->setupcalled) { 299 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 300 } 301 } 302 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "PCSetUpOnBlocks_ASM" 308 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc) 309 { 310 PC_ASM *osm = (PC_ASM*)pc->data; 311 PetscErrorCode ierr; 312 PetscInt i; 313 314 PetscFunctionBegin; 315 for (i=0; i<osm->n_local_true; i++) { 316 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "PCApply_ASM" 323 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y) 324 { 325 PC_ASM *osm = (PC_ASM*)pc->data; 326 PetscErrorCode ierr; 327 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 328 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 329 330 PetscFunctionBegin; 331 /* 332 Support for limiting the restriction or interpolation to only local 333 subdomain values (leaving the other values 0). 334 */ 335 if (!(osm->type & PC_ASM_RESTRICT)) { 336 forward = SCATTER_FORWARD_LOCAL; 337 /* have to zero the work RHS since scatter may leave some slots empty */ 338 for (i=0; i<n_local_true; i++) { 339 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 340 } 341 } 342 if (!(osm->type & PC_ASM_INTERPOLATE)) { 343 reverse = SCATTER_REVERSE_LOCAL; 344 } 345 346 for (i=0; i<n_local; i++) { 347 ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 348 } 349 ierr = VecSet(y,0.0);CHKERRQ(ierr); 350 /* do the local solves */ 351 for (i=0; i<n_local_true; i++) { 352 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 353 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 354 ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 355 ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 356 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 357 } 358 /* handle the rest of the scatters that do not have local solves */ 359 for (i=n_local_true; i<n_local; i++) { 360 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 361 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 362 } 363 for (i=0; i<n_local; i++) { 364 ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 365 } 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "PCApplyTranspose_ASM" 371 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y) 372 { 373 PC_ASM *osm = (PC_ASM*)pc->data; 374 PetscErrorCode ierr; 375 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 376 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 377 378 PetscFunctionBegin; 379 /* 380 Support for limiting the restriction or interpolation to only local 381 subdomain values (leaving the other values 0). 382 383 Note: these are reversed from the PCApply_ASM() because we are applying the 384 transpose of the three terms 385 */ 386 if (!(osm->type & PC_ASM_INTERPOLATE)) { 387 forward = SCATTER_FORWARD_LOCAL; 388 /* have to zero the work RHS since scatter may leave some slots empty */ 389 for (i=0; i<n_local_true; i++) { 390 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 391 } 392 } 393 if (!(osm->type & PC_ASM_RESTRICT)) { 394 reverse = SCATTER_REVERSE_LOCAL; 395 } 396 397 for (i=0; i<n_local; i++) { 398 ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 399 } 400 ierr = VecSet(y,0.0);CHKERRQ(ierr); 401 /* do the local solves */ 402 for (i=0; i<n_local_true; i++) { 403 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 404 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 405 ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 406 ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 407 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 408 } 409 /* handle the rest of the scatters that do not have local solves */ 410 for (i=n_local_true; i<n_local; i++) { 411 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 412 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 413 } 414 for (i=0; i<n_local; i++) { 415 ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCDestroy_ASM" 422 static PetscErrorCode PCDestroy_ASM(PC pc) 423 { 424 PC_ASM *osm = (PC_ASM*)pc->data; 425 PetscErrorCode ierr; 426 PetscInt i; 427 428 PetscFunctionBegin; 429 if (osm->ksp) { 430 for (i=0; i<osm->n_local_true; i++) { 431 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 432 } 433 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 434 } 435 if (osm->pmat) { 436 if (osm->n_local_true > 0) { 437 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 438 } 439 } 440 if (osm->restriction) { 441 for (i=0; i<osm->n_local; i++) { 442 ierr = VecScatterDestroy(osm->restriction[i]);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(osm->prolongation[i]);CHKERRQ(ierr); 444 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 445 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 446 ierr = VecDestroy(osm->y_local[i]);CHKERRQ(ierr); 447 } 448 ierr = PetscFree(osm->restriction);CHKERRQ(ierr); 449 ierr = PetscFree(osm->prolongation);CHKERRQ(ierr); 450 ierr = PetscFree(osm->x);CHKERRQ(ierr); 451 ierr = PetscFree(osm->y);CHKERRQ(ierr); 452 ierr = PetscFree(osm->y_local);CHKERRQ(ierr); 453 } 454 if (osm->is) { 455 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 456 } 457 ierr = PetscFree(osm);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "PCSetFromOptions_ASM" 463 static PetscErrorCode PCSetFromOptions_ASM(PC pc) 464 { 465 PC_ASM *osm = (PC_ASM*)pc->data; 466 PetscErrorCode ierr; 467 PetscInt blocks,ovl; 468 PetscTruth symset,flg; 469 PCASMType asmtype; 470 471 PetscFunctionBegin; 472 /* set the type to symmetric if matrix is symmetric */ 473 if (!osm->type_set && pc->pmat) { 474 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 475 if (symset && flg) { osm->type = PC_ASM_BASIC; } 476 } 477 ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr); 478 ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 479 if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); } 480 ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 481 if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 482 flg = PETSC_FALSE; 483 ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr); 484 if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); } 485 ierr = PetscOptionsTail();CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 /*------------------------------------------------------------------------------------*/ 490 491 EXTERN_C_BEGIN 492 #undef __FUNCT__ 493 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM" 494 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[]) 495 { 496 PC_ASM *osm = (PC_ASM*)pc->data; 497 PetscErrorCode ierr; 498 PetscInt i; 499 500 PetscFunctionBegin; 501 if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 502 if (pc->setupcalled && (n != osm->n_local_true || is)) { 503 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup()."); 504 } 505 if (!pc->setupcalled) { 506 if (is) { 507 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 508 } 509 if (osm->is) { 510 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 511 } 512 osm->n_local_true = n; 513 osm->is = 0; 514 osm->is_local = 0; 515 if (is) { 516 ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr); 517 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 518 } 519 if (is_local) { 520 ierr = PetscMalloc(n*sizeof(IS *),&osm->is_local);CHKERRQ(ierr); 521 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 522 } 523 } 524 PetscFunctionReturn(0); 525 } 526 EXTERN_C_END 527 528 EXTERN_C_BEGIN 529 #undef __FUNCT__ 530 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM" 531 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is) 532 { 533 PC_ASM *osm = (PC_ASM*)pc->data; 534 PetscErrorCode ierr; 535 PetscMPIInt rank,size; 536 PetscInt n; 537 538 PetscFunctionBegin; 539 if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 540 if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet."); 541 542 /* 543 Split the subdomains equally amoung all processors 544 */ 545 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 546 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 547 n = N/size + ((N % size) > rank); 548 if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 549 if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup()."); 550 if (!pc->setupcalled) { 551 if (osm->is) { 552 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 553 } 554 osm->n_local_true = n; 555 osm->is = 0; 556 osm->is_local = 0; 557 } 558 PetscFunctionReturn(0); 559 } 560 EXTERN_C_END 561 562 EXTERN_C_BEGIN 563 #undef __FUNCT__ 564 #define __FUNCT__ "PCASMSetOverlap_ASM" 565 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl) 566 { 567 PC_ASM *osm = (PC_ASM*)pc->data; 568 569 PetscFunctionBegin; 570 if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 571 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup()."); 572 if (!pc->setupcalled) { 573 osm->overlap = ovl; 574 } 575 PetscFunctionReturn(0); 576 } 577 EXTERN_C_END 578 579 EXTERN_C_BEGIN 580 #undef __FUNCT__ 581 #define __FUNCT__ "PCASMSetType_ASM" 582 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type) 583 { 584 PC_ASM *osm = (PC_ASM*)pc->data; 585 586 PetscFunctionBegin; 587 osm->type = type; 588 osm->type_set = PETSC_TRUE; 589 PetscFunctionReturn(0); 590 } 591 EXTERN_C_END 592 593 EXTERN_C_BEGIN 594 #undef __FUNCT__ 595 #define __FUNCT__ "PCASMGetSubKSP_ASM" 596 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp) 597 { 598 PC_ASM *osm = (PC_ASM*)pc->data; 599 PetscErrorCode ierr; 600 601 PetscFunctionBegin; 602 if (osm->n_local_true < 1) { 603 SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 604 } 605 606 if (n_local) { 607 *n_local = osm->n_local_true; 608 } 609 if (first_local) { 610 ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 611 *first_local -= osm->n_local_true; 612 } 613 if (ksp) { 614 /* Assume that local solves are now different; not necessarily 615 true though! This flag is used only for PCView_ASM() */ 616 *ksp = osm->ksp; 617 osm->same_local_solves = PETSC_FALSE; 618 } 619 PetscFunctionReturn(0); 620 } 621 EXTERN_C_END 622 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCASMSetLocalSubdomains" 626 /*@C 627 PCASMSetLocalSubdomains - Sets the local subdomains (for this processor 628 only) for the additive Schwarz preconditioner. 629 630 Collective on PC 631 632 Input Parameters: 633 + pc - the preconditioner context 634 . n - the number of subdomains for this processor (default value = 1) 635 . is - the index sets that define the subdomains for this processor 636 (or PETSC_NULL for PETSc to determine subdomains) 637 - is_local - the index sets that define the local part of the subdomains for this processor 638 (or PETSC_NULL to use the default of 1 subdomain per process) 639 640 Notes: 641 The IS numbering is in the parallel, global numbering of the vector. 642 643 By default the ASM preconditioner uses 1 block per processor. 644 645 These index sets cannot be destroyed until after completion of the 646 linear solves for which the ASM preconditioner is being used. 647 648 Use PCASMSetTotalSubdomains() to set the subdomains for all processors. 649 650 Level: advanced 651 652 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 653 654 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 655 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 656 @*/ 657 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 658 { 659 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]); 660 661 PetscFunctionBegin; 662 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 663 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 664 if (f) { 665 ierr = (*f)(pc,n,is,is_local);CHKERRQ(ierr); 666 } 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "PCASMSetTotalSubdomains" 672 /*@C 673 PCASMSetTotalSubdomains - Sets the subdomains for all processor for the 674 additive Schwarz preconditioner. Either all or no processors in the 675 PC communicator must call this routine, with the same index sets. 676 677 Collective on PC 678 679 Input Parameters: 680 + pc - the preconditioner context 681 . n - the number of subdomains for all processors 682 . is - the index sets that define the subdomains for all processor 683 (or PETSC_NULL for PETSc to determine subdomains) 684 - is_local - the index sets that define the local part of the subdomains for this processor 685 (or PETSC_NULL to use the default of 1 subdomain per process) 686 687 Options Database Key: 688 To set the total number of subdomain blocks rather than specify the 689 index sets, use the option 690 . -pc_asm_blocks <blks> - Sets total blocks 691 692 Notes: 693 Currently you cannot use this to set the actual subdomains with the argument is. 694 695 By default the ASM preconditioner uses 1 block per processor. 696 697 These index sets cannot be destroyed until after completion of the 698 linear solves for which the ASM preconditioner is being used. 699 700 Use PCASMSetLocalSubdomains() to set local subdomains. 701 702 Level: advanced 703 704 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz 705 706 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 707 PCASMCreateSubdomains2D() 708 @*/ 709 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[]) 710 { 711 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]); 712 713 PetscFunctionBegin; 714 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 715 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 716 if (f) { 717 ierr = (*f)(pc,N,is,is_local);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "PCASMSetOverlap" 724 /*@ 725 PCASMSetOverlap - Sets the overlap between a pair of subdomains for the 726 additive Schwarz preconditioner. Either all or no processors in the 727 PC communicator must call this routine. 728 729 Collective on PC 730 731 Input Parameters: 732 + pc - the preconditioner context 733 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 734 735 Options Database Key: 736 . -pc_asm_overlap <ovl> - Sets overlap 737 738 Notes: 739 By default the ASM preconditioner uses 1 block per processor. To use 740 multiple blocks per perocessor, see PCASMSetTotalSubdomains() and 741 PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>). 742 743 The overlap defaults to 1, so if one desires that no additional 744 overlap be computed beyond what may have been set with a call to 745 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl 746 must be set to be 0. In particular, if one does not explicitly set 747 the subdomains an application code, then all overlap would be computed 748 internally by PETSc, and using an overlap of 0 would result in an ASM 749 variant that is equivalent to the block Jacobi preconditioner. 750 751 Note that one can define initial index sets with any overlap via 752 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine 753 PCASMSetOverlap() merely allows PETSc to extend that overlap further 754 if desired. 755 756 Level: intermediate 757 758 .keywords: PC, ASM, set, overlap 759 760 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 761 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 762 @*/ 763 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl) 764 { 765 PetscErrorCode ierr,(*f)(PC,PetscInt); 766 767 PetscFunctionBegin; 768 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 769 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr); 770 if (f) { 771 ierr = (*f)(pc,ovl);CHKERRQ(ierr); 772 } 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "PCASMSetType" 778 /*@ 779 PCASMSetType - Sets the type of restriction and interpolation used 780 for local problems in the additive Schwarz method. 781 782 Collective on PC 783 784 Input Parameters: 785 + pc - the preconditioner context 786 - type - variant of ASM, one of 787 .vb 788 PC_ASM_BASIC - full interpolation and restriction 789 PC_ASM_RESTRICT - full restriction, local processor interpolation 790 PC_ASM_INTERPOLATE - full interpolation, local processor restriction 791 PC_ASM_NONE - local processor restriction and interpolation 792 .ve 793 794 Options Database Key: 795 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 796 797 Level: intermediate 798 799 .keywords: PC, ASM, set, type 800 801 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(), 802 PCASMCreateSubdomains2D() 803 @*/ 804 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type) 805 { 806 PetscErrorCode ierr,(*f)(PC,PCASMType); 807 808 PetscFunctionBegin; 809 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 810 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr); 811 if (f) { 812 ierr = (*f)(pc,type);CHKERRQ(ierr); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "PCASMGetSubKSP" 819 /*@C 820 PCASMGetSubKSP - Gets the local KSP contexts for all blocks on 821 this processor. 822 823 Collective on PC iff first_local is requested 824 825 Input Parameter: 826 . pc - the preconditioner context 827 828 Output Parameters: 829 + n_local - the number of blocks on this processor or PETSC_NULL 830 . first_local - the global number of the first block on this processor or PETSC_NULL, 831 all processors must request or all must pass PETSC_NULL 832 - ksp - the array of KSP contexts 833 834 Note: 835 After PCASMGetSubKSP() the array of KSPes is not to be freed 836 837 Currently for some matrix implementations only 1 block per processor 838 is supported. 839 840 You must call KSPSetUp() before calling PCASMGetSubKSP(). 841 842 Level: advanced 843 844 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context 845 846 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(), 847 PCASMCreateSubdomains2D(), 848 @*/ 849 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 850 { 851 PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **); 852 853 PetscFunctionBegin; 854 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 855 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr); 856 if (f) { 857 ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr); 858 } else { 859 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC"); 860 } 861 862 PetscFunctionReturn(0); 863 } 864 865 /* -------------------------------------------------------------------------------------*/ 866 /*MC 867 PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 868 its own KSP object. 869 870 Options Database Keys: 871 + -pc_asm_truelocal - Activates PCASMSetUseTrueLocal() 872 . -pc_asm_blocks <blks> - Sets total blocks 873 . -pc_asm_overlap <ovl> - Sets overlap 874 - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 875 876 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 877 will get a different convergence rate due to the default option of -pc_asm_type restrict. Use 878 -pc_asm_type basic to use the standard ASM. 879 880 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 881 than one processor. Defaults to one block per processor. 882 883 To set options on the solvers for each block append -sub_ to all the KSP, and PC 884 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 885 886 To set the options on the solvers separate for each block call PCASMGetSubKSP() 887 and set the options directly on the resulting KSP object (you can access its PC 888 with KSPGetPC()) 889 890 891 Level: beginner 892 893 Concepts: additive Schwarz method 894 895 References: 896 An additive variant of the Schwarz alternating method for the case of many subregions 897 M Dryja, OB Widlund - Courant Institute, New York University Technical report 898 899 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 900 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 901 902 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 903 PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(), 904 PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType() 905 906 M*/ 907 908 EXTERN_C_BEGIN 909 #undef __FUNCT__ 910 #define __FUNCT__ "PCCreate_ASM" 911 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc) 912 { 913 PetscErrorCode ierr; 914 PC_ASM *osm; 915 916 PetscFunctionBegin; 917 ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr); 918 osm->n = PETSC_DECIDE; 919 osm->n_local = 0; 920 osm->n_local_true = 0; 921 osm->overlap = 1; 922 osm->ksp = 0; 923 osm->restriction = 0; 924 osm->prolongation = 0; 925 osm->x = 0; 926 osm->y = 0; 927 osm->y_local = 0; 928 osm->is = 0; 929 osm->is_local = 0; 930 osm->mat = 0; 931 osm->pmat = 0; 932 osm->type = PC_ASM_RESTRICT; 933 osm->same_local_solves = PETSC_TRUE; 934 935 pc->data = (void*)osm; 936 pc->ops->apply = PCApply_ASM; 937 pc->ops->applytranspose = PCApplyTranspose_ASM; 938 pc->ops->setup = PCSetUp_ASM; 939 pc->ops->destroy = PCDestroy_ASM; 940 pc->ops->setfromoptions = PCSetFromOptions_ASM; 941 pc->ops->setuponblocks = PCSetUpOnBlocks_ASM; 942 pc->ops->view = PCView_ASM; 943 pc->ops->applyrichardson = 0; 944 945 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM", 946 PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr); 947 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM", 948 PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr); 949 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM", 950 PCASMSetOverlap_ASM);CHKERRQ(ierr); 951 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM", 952 PCASMSetType_ASM);CHKERRQ(ierr); 953 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM", 954 PCASMGetSubKSP_ASM);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 EXTERN_C_END 958 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "PCASMCreateSubdomains" 962 /*@C 963 PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 964 preconditioner for a any problem on a general grid. 965 966 Collective 967 968 Input Parameters: 969 + A - The global matrix operator 970 - n - the number of local blocks 971 972 Output Parameters: 973 . outis - the array of index sets defining the subdomains 974 975 Level: advanced 976 977 Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap 978 from these if you use PCASMSetLocalSubdomains() 979 980 In the Fortran version you must provide the array outis[] already allocated of length n. 981 982 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 983 984 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains() 985 @*/ 986 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 987 { 988 MatPartitioning mpart; 989 const char *prefix; 990 PetscErrorCode (*f)(Mat,PetscTruth*,MatReuse,Mat*); 991 PetscMPIInt size; 992 PetscInt i,j,rstart,rend,bs; 993 PetscTruth iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 994 Mat Ad = PETSC_NULL, adj; 995 IS ispart,isnumb,*is; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1000 PetscValidPointer(outis,3); 1001 if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);} 1002 1003 /* Get prefix, row distribution, and block size */ 1004 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1005 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1006 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1007 if (rstart/bs*bs != rstart || rend/bs*bs != rend) { 1008 SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1009 } 1010 /* Get diagonal block from matrix if possible */ 1011 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1012 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1013 if (f) { 1014 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1015 } else if (size == 1) { 1016 iscopy = PETSC_FALSE; Ad = A; 1017 } else { 1018 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1019 } 1020 if (Ad) { 1021 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1022 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1023 } 1024 if (Ad && n > 1) { 1025 PetscTruth match,done; 1026 /* Try to setup a good matrix partitioning if available */ 1027 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1028 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1029 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1030 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr); 1031 if (!match) { 1032 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr); 1033 } 1034 if (!match) { /* assume a "good" partitioner is available */ 1035 PetscInt na,*ia,*ja; 1036 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1037 if (done) { 1038 /* Build adjacency matrix by hand. Unfortunately a call to 1039 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1040 remove the block-aij structure and we cannot expect 1041 MatPartitioning to split vertices as we need */ 1042 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1043 nnz = 0; 1044 for (i=0; i<na; i++) { /* count number of nonzeros */ 1045 len = ia[i+1] - ia[i]; 1046 row = ja + ia[i]; 1047 for (j=0; j<len; j++) { 1048 if (row[j] == i) { /* don't count diagonal */ 1049 len--; break; 1050 } 1051 } 1052 nnz += len; 1053 } 1054 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1055 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1056 nnz = 0; 1057 iia[0] = 0; 1058 for (i=0; i<na; i++) { /* fill adjacency */ 1059 cnt = 0; 1060 len = ia[i+1] - ia[i]; 1061 row = ja + ia[i]; 1062 for (j=0; j<len; j++) { 1063 if (row[j] != i) { /* if not diagonal */ 1064 jja[nnz+cnt++] = row[j]; 1065 } 1066 } 1067 nnz += cnt; 1068 iia[i+1] = nnz; 1069 } 1070 /* Partitioning of the adjacency matrix */ 1071 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1072 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1073 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1074 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1075 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1076 ierr = MatDestroy(adj);CHKERRQ(ierr); 1077 foundpart = PETSC_TRUE; 1078 } 1079 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1080 } 1081 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1082 } 1083 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1084 1085 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1086 *outis = is; 1087 1088 if (!foundpart) { 1089 1090 /* Partitioning by contiguous chunks of rows */ 1091 1092 PetscInt mbs = (rend-rstart)/bs; 1093 PetscInt start = rstart; 1094 for (i=0; i<n; i++) { 1095 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1096 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1097 start += count; 1098 } 1099 1100 } else { 1101 1102 /* Partitioning by adjacency of diagonal block */ 1103 1104 const PetscInt *numbering; 1105 PetscInt *count,nidx,*indices,*newidx,start=0; 1106 /* Get node count in each partition */ 1107 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1108 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1109 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1110 for (i=0; i<n; i++) count[i] *= bs; 1111 } 1112 /* Build indices from node numbering */ 1113 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1114 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1115 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1116 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1117 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1118 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1119 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1120 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1121 for (i=0; i<nidx; i++) 1122 for (j=0; j<bs; j++) 1123 newidx[i*bs+j] = indices[i]*bs + j; 1124 ierr = PetscFree(indices);CHKERRQ(ierr); 1125 nidx *= bs; 1126 indices = newidx; 1127 } 1128 /* Shift to get global indices */ 1129 for (i=0; i<nidx; i++) indices[i] += rstart; 1130 1131 /* Build the index sets for each block */ 1132 for (i=0; i<n; i++) { 1133 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr); 1134 ierr = ISSort(is[i]);CHKERRQ(ierr); 1135 start += count[i]; 1136 } 1137 1138 ierr = PetscFree(count); 1139 ierr = PetscFree(indices); 1140 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1141 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1142 1143 } 1144 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "PCASMDestroySubdomains" 1150 /*@C 1151 PCASMDestroySubdomains - Destroys the index sets created with 1152 PCASMCreateSubdomains(). Should be called after setting subdomains 1153 with PCASMSetLocalSubdomains(). 1154 1155 Collective 1156 1157 Input Parameters: 1158 + n - the number of index sets 1159 . is - the array of index sets 1160 - is_local - the array of local index sets, can be PETSC_NULL 1161 1162 Level: advanced 1163 1164 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 1165 1166 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains() 1167 @*/ 1168 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1169 { 1170 PetscInt i; 1171 PetscErrorCode ierr; 1172 PetscFunctionBegin; 1173 if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1174 PetscValidPointer(is,2); 1175 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1176 ierr = PetscFree(is);CHKERRQ(ierr); 1177 if (is_local) { 1178 PetscValidPointer(is_local,3); 1179 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1180 ierr = PetscFree(is_local);CHKERRQ(ierr); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "PCASMCreateSubdomains2D" 1187 /*@ 1188 PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1189 preconditioner for a two-dimensional problem on a regular grid. 1190 1191 Not Collective 1192 1193 Input Parameters: 1194 + m, n - the number of mesh points in the x and y directions 1195 . M, N - the number of subdomains in the x and y directions 1196 . dof - degrees of freedom per node 1197 - overlap - overlap in mesh lines 1198 1199 Output Parameters: 1200 + Nsub - the number of subdomains created 1201 - is - the array of index sets defining the subdomains 1202 1203 Note: 1204 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1205 preconditioners. More general related routines are 1206 PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains(). 1207 1208 Level: advanced 1209 1210 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid 1211 1212 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 1213 PCASMSetOverlap() 1214 @*/ 1215 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is) 1216 { 1217 PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter; 1218 PetscErrorCode ierr; 1219 PetscInt nidx,*idx,loc,ii,jj,count; 1220 1221 PetscFunctionBegin; 1222 if (dof != 1) SETERRQ(PETSC_ERR_SUP," "); 1223 1224 *Nsub = N*M; 1225 ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr); 1226 ystart = 0; 1227 loc_outter = 0; 1228 for (i=0; i<N; i++) { 1229 height = n/N + ((n % N) > i); /* height of subdomain */ 1230 if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n"); 1231 yleft = ystart - overlap; if (yleft < 0) yleft = 0; 1232 yright = ystart + height + overlap; if (yright > n) yright = n; 1233 xstart = 0; 1234 for (j=0; j<M; j++) { 1235 width = m/M + ((m % M) > j); /* width of subdomain */ 1236 if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m"); 1237 xleft = xstart - overlap; if (xleft < 0) xleft = 0; 1238 xright = xstart + width + overlap; if (xright > m) xright = m; 1239 nidx = (xright - xleft)*(yright - yleft); 1240 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1241 loc = 0; 1242 for (ii=yleft; ii<yright; ii++) { 1243 count = m*ii + xleft; 1244 for (jj=xleft; jj<xright; jj++) { 1245 idx[loc++] = count++; 1246 } 1247 } 1248 ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr); 1249 ierr = PetscFree(idx);CHKERRQ(ierr); 1250 xstart += width; 1251 } 1252 ystart += height; 1253 } 1254 for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "PCASMGetLocalSubdomains" 1260 /*@C 1261 PCASMGetLocalSubdomains - Gets the local subdomains (for this processor 1262 only) for the additive Schwarz preconditioner. 1263 1264 Collective on PC 1265 1266 Input Parameter: 1267 . pc - the preconditioner context 1268 1269 Output Parameters: 1270 + n - the number of subdomains for this processor (default value = 1) 1271 . is - the index sets that define the subdomains for this processor 1272 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1273 1274 1275 Notes: 1276 The IS numbering is in the parallel, global numbering of the vector. 1277 1278 Level: advanced 1279 1280 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 1281 1282 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1283 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices() 1284 @*/ 1285 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1286 { 1287 PC_ASM *osm; 1288 PetscErrorCode ierr; 1289 PetscTruth match; 1290 1291 PetscFunctionBegin; 1292 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1293 PetscValidIntPointer(n,2); 1294 if (is) PetscValidPointer(is,3); 1295 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1296 if (!match) { 1297 if (n) *n = 0; 1298 if (is) *is = PETSC_NULL; 1299 } else { 1300 osm = (PC_ASM*)pc->data; 1301 if (n) *n = osm->n_local_true; 1302 if (is) *is = osm->is; 1303 if (is_local) *is_local = osm->is_local; 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "PCASMGetLocalSubmatrices" 1310 /*@C 1311 PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1312 only) for the additive Schwarz preconditioner. 1313 1314 Collective on PC 1315 1316 Input Parameter: 1317 . pc - the preconditioner context 1318 1319 Output Parameters: 1320 + n - the number of matrices for this processor (default value = 1) 1321 - mat - the matrices 1322 1323 1324 Level: advanced 1325 1326 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi 1327 1328 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1329 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains() 1330 @*/ 1331 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1332 { 1333 PC_ASM *osm; 1334 PetscErrorCode ierr; 1335 PetscTruth match; 1336 1337 PetscFunctionBegin; 1338 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1339 PetscValidIntPointer(n,2); 1340 if (mat) PetscValidPointer(mat,3); 1341 if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1342 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1343 if (!match) { 1344 if (n) *n = 0; 1345 if (mat) *mat = PETSC_NULL; 1346 } else { 1347 osm = (PC_ASM*)pc->data; 1348 if (n) *n = osm->n_local_true; 1349 if (mat) *mat = osm->pmat; 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353