1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an additive Schwarz preconditioner for any Mat implementation. 5 6 Note that each processor may have any number of subdomains. But in order to 7 deal easily with the VecScatter(), we treat each processor as if it has the 8 same number of subdomains. 9 10 n - total number of true subdomains on all processors 11 n_local_true - actual number of subdomains on this processor 12 n_local = maximum over all processors of n_local_true 13 */ 14 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 15 16 typedef struct { 17 PetscInt n, n_local, n_local_true; 18 PetscInt overlap; /* overlap requested by user */ 19 KSP *ksp; /* linear solvers for each block */ 20 VecScatter *restriction; /* mapping from global to subregion */ 21 VecScatter *localization; /* mapping from overlapping to non-overlapping subregion */ 22 VecScatter *prolongation; /* mapping from subregion to global */ 23 Vec *x,*y,*y_local; /* work vectors */ 24 IS *is; /* index set that defines each overlapping subdomain */ 25 IS *is_local; /* index set that defines each non-overlapping subdomain, may be NULL */ 26 Mat *mat,*pmat; /* mat is not currently used */ 27 PCASMType type; /* use reduced interpolation, restriction or both */ 28 PetscTruth type_set; /* if user set this value (so won't change it for symmetric problems) */ 29 PetscTruth same_local_solves; /* flag indicating whether all local solvers are same */ 30 } PC_ASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCView_ASM" 34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer) 35 { 36 PC_ASM *osm = (PC_ASM*)pc->data; 37 PetscErrorCode ierr; 38 PetscMPIInt rank; 39 PetscInt i,bsz; 40 PetscTruth iascii,isstring; 41 PetscViewer sviewer; 42 43 44 PetscFunctionBegin; 45 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 46 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 47 if (iascii) { 48 if (osm->n > 0) { 49 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr); 50 } else { 51 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr); 54 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 55 if (osm->same_local_solves) { 56 if (osm->ksp) { 57 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr); 58 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 59 if (!rank) { 60 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 61 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 62 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 63 } 64 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 65 } 66 } else { 67 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr); 68 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 69 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr); 70 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 72 for (i=0; i<osm->n_local; i++) { 73 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 74 if (i < osm->n_local_true) { 75 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 76 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr); 77 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 78 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 79 } 80 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 81 } 82 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 83 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 84 } 85 } else if (isstring) { 86 ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr); 87 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 88 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 89 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 90 } else { 91 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCASMPrintSubdomains" 98 static PetscErrorCode PCASMPrintSubdomains(PC pc) 99 { 100 PC_ASM *osm = (PC_ASM*)pc->data; 101 const char *prefix; 102 char fname[PETSC_MAX_PATH_LEN+1]; 103 PetscViewer viewer; 104 PetscInt i,j,nidx; 105 const PetscInt *idx; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 110 ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr); 111 if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 112 ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr); 113 for (i=0;i<osm->n_local_true;i++) { 114 ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr); 115 ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr); 116 for (j=0; j<nidx; j++) { 117 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 118 } 119 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 120 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 121 if (osm->is_local) { 122 ierr = ISGetLocalSize(osm->is_local[i],&nidx);CHKERRQ(ierr); 123 ierr = ISGetIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 124 for (j=0; j<nidx; j++) { 125 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 126 } 127 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 129 } 130 } 131 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 132 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "PCSetUp_ASM" 138 static PetscErrorCode PCSetUp_ASM(PC pc) 139 { 140 PC_ASM *osm = (PC_ASM*)pc->data; 141 PetscErrorCode ierr; 142 PetscTruth symset,flg; 143 PetscInt i,m,m_local,firstRow,lastRow; 144 PetscMPIInt size; 145 MatReuse scall = MAT_REUSE_MATRIX; 146 IS isl; 147 KSP ksp; 148 PC subpc; 149 const char *prefix,*pprefix; 150 Vec vec; 151 152 PetscFunctionBegin; 153 if (!pc->setupcalled) { 154 155 if (!osm->type_set) { 156 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 157 if (symset && flg) { osm->type = PC_ASM_BASIC; } 158 } 159 160 if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) { 161 /* no subdomains given, use one per processor */ 162 osm->n_local = osm->n_local_true = 1; 163 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 164 osm->n = size; 165 } else if (osm->n == PETSC_DECIDE) { 166 /* determine global number of subdomains */ 167 PetscInt inwork[2],outwork[2]; 168 inwork[0] = inwork[1] = osm->n_local_true; 169 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 170 osm->n_local = outwork[0]; 171 osm->n = outwork[1]; 172 } 173 174 if (!osm->is){ /* create the index sets */ 175 ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr); 176 } 177 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 178 flg = PETSC_FALSE; 179 ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 180 if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); } 181 182 /* Extend the "overlapping" regions by a number of steps */ 183 ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr); 184 for (i=0; i<osm->n_local_true; i++) { 185 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 186 if (osm->is_local) { 187 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 188 } 189 } 190 191 /* Create the local work vectors and scatter contexts */ 192 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 193 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->restriction);CHKERRQ(ierr); 194 if (osm->is_local) {ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->localization);CHKERRQ(ierr);} 195 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->prolongation);CHKERRQ(ierr); 196 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr); 197 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr); 198 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y_local);CHKERRQ(ierr); 199 ierr = VecGetOwnershipRange(vec, &firstRow, &lastRow);CHKERRQ(ierr); 200 for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) { 201 ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr); 202 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr); 203 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 204 ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr); 205 ierr = ISDestroy(isl);CHKERRQ(ierr); 206 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 207 if (osm->is_local) { 208 IS isShift; 209 const PetscInt *idx_local; 210 PetscInt *idx, j; 211 212 ierr = ISGetLocalSize(osm->is_local[i],&m_local);CHKERRQ(ierr); 213 ierr = ISGetIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr); 214 ierr = PetscMalloc(m*sizeof(PetscInt), &idx);CHKERRQ(ierr); 215 for(j = 0; j < m_local; ++j) { 216 idx[j] = idx_local[j] - firstRow; 217 } 218 ierr = ISRestoreIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr); 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,&isShift);CHKERRQ(ierr); 220 ierr = PetscFree(idx);CHKERRQ(ierr); 221 ierr = ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);CHKERRQ(ierr); 222 ierr = VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);CHKERRQ(ierr); 223 ierr = VecScatterCreate(osm->y[i],isShift,osm->y_local[i],isl,&osm->localization[i]);CHKERRQ(ierr); 224 ierr = ISDestroy(isShift);CHKERRQ(ierr); 225 226 ierr = VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);CHKERRQ(ierr); 227 ierr = ISDestroy(isl);CHKERRQ(ierr); 228 } else { 229 ierr = VecGetLocalSize(vec,&m_local);CHKERRQ(ierr); 230 osm->y_local[i] = osm->y[i]; 231 ierr = PetscObjectReference((PetscObject) osm->y[i]);CHKERRQ(ierr); 232 osm->prolongation[i] = osm->restriction[i]; 233 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 234 } 235 } 236 if (firstRow != lastRow) SETERRQ2(PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow); 237 for (i=osm->n_local_true; i<osm->n_local; i++) { 238 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr); 239 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 240 ierr = VecDuplicate(osm->x[i],&osm->y_local[i]);CHKERRQ(ierr); 241 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr); 242 ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr); 243 osm->prolongation[i] = osm->restriction[i]; 244 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 245 ierr = ISDestroy(isl);CHKERRQ(ierr); 246 } 247 ierr = VecDestroy(vec);CHKERRQ(ierr); 248 249 /* Create the local solvers */ 250 ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 251 for (i=0; i<osm->n_local_true; i++) { 252 ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr); 253 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 254 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 255 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 256 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 257 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 258 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 259 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 260 osm->ksp[i] = ksp; 261 } 262 scall = MAT_INITIAL_MATRIX; 263 264 } else { 265 /* 266 Destroy the blocks from the previous iteration 267 */ 268 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 269 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 270 scall = MAT_INITIAL_MATRIX; 271 } 272 } 273 274 /* 275 Extract out the submatrices 276 */ 277 ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr); 278 if (scall == MAT_INITIAL_MATRIX) { 279 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 280 for (i=0; i<osm->n_local_true; i++) { 281 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 282 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 283 } 284 } 285 286 /* Return control to the user so that the submatrices can be modified (e.g., to apply 287 different boundary conditions for the submatrices than for the global problem) */ 288 ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 289 290 /* 291 Loop over subdomains putting them into local ksp 292 */ 293 for (i=0; i<osm->n_local_true; i++) { 294 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 295 if (!pc->setupcalled) { 296 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 297 } 298 } 299 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "PCSetUpOnBlocks_ASM" 305 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc) 306 { 307 PC_ASM *osm = (PC_ASM*)pc->data; 308 PetscErrorCode ierr; 309 PetscInt i; 310 311 PetscFunctionBegin; 312 for (i=0; i<osm->n_local_true; i++) { 313 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "PCApply_ASM" 320 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y) 321 { 322 PC_ASM *osm = (PC_ASM*)pc->data; 323 PetscErrorCode ierr; 324 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 325 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 326 327 PetscFunctionBegin; 328 /* 329 Support for limiting the restriction or interpolation to only local 330 subdomain values (leaving the other values 0). 331 */ 332 if (!(osm->type & PC_ASM_RESTRICT)) { 333 forward = SCATTER_FORWARD_LOCAL; 334 /* have to zero the work RHS since scatter may leave some slots empty */ 335 for (i=0; i<n_local_true; i++) { 336 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 337 } 338 } 339 if (!(osm->type & PC_ASM_INTERPOLATE)) { 340 reverse = SCATTER_REVERSE_LOCAL; 341 } 342 343 for (i=0; i<n_local; i++) { 344 ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 345 } 346 ierr = VecSet(y,0.0);CHKERRQ(ierr); 347 /* do the local solves */ 348 for (i=0; i<n_local_true; i++) { 349 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 350 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 351 if (osm->localization) { 352 ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 353 ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 354 } 355 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 356 } 357 /* handle the rest of the scatters that do not have local solves */ 358 for (i=n_local_true; i<n_local; i++) { 359 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 360 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 361 } 362 for (i=0; i<n_local; i++) { 363 ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "PCApplyTranspose_ASM" 370 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y) 371 { 372 PC_ASM *osm = (PC_ASM*)pc->data; 373 PetscErrorCode ierr; 374 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 375 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 376 377 PetscFunctionBegin; 378 /* 379 Support for limiting the restriction or interpolation to only local 380 subdomain values (leaving the other values 0). 381 382 Note: these are reversed from the PCApply_ASM() because we are applying the 383 transpose of the three terms 384 */ 385 if (!(osm->type & PC_ASM_INTERPOLATE)) { 386 forward = SCATTER_FORWARD_LOCAL; 387 /* have to zero the work RHS since scatter may leave some slots empty */ 388 for (i=0; i<n_local_true; i++) { 389 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 390 } 391 } 392 if (!(osm->type & PC_ASM_RESTRICT)) { 393 reverse = SCATTER_REVERSE_LOCAL; 394 } 395 396 for (i=0; i<n_local; i++) { 397 ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 398 } 399 ierr = VecSet(y,0.0);CHKERRQ(ierr); 400 /* do the local solves */ 401 for (i=0; i<n_local_true; i++) { 402 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 403 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 404 if (osm->localization) { 405 ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 406 ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 407 } 408 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 409 } 410 /* handle the rest of the scatters that do not have local solves */ 411 for (i=n_local_true; i<n_local; i++) { 412 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 413 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 414 } 415 for (i=0; i<n_local; i++) { 416 ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "PCDestroy_ASM" 423 static PetscErrorCode PCDestroy_ASM(PC pc) 424 { 425 PC_ASM *osm = (PC_ASM*)pc->data; 426 PetscErrorCode ierr; 427 PetscInt i; 428 429 PetscFunctionBegin; 430 if (osm->ksp) { 431 for (i=0; i<osm->n_local_true; i++) { 432 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 433 } 434 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 435 } 436 if (osm->pmat) { 437 if (osm->n_local_true > 0) { 438 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 439 } 440 } 441 if (osm->restriction) { 442 for (i=0; i<osm->n_local; i++) { 443 ierr = VecScatterDestroy(osm->restriction[i]);CHKERRQ(ierr); 444 if (osm->localization) {ierr = VecScatterDestroy(osm->localization[i]);CHKERRQ(ierr);} 445 ierr = VecScatterDestroy(osm->prolongation[i]);CHKERRQ(ierr); 446 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 447 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 448 ierr = VecDestroy(osm->y_local[i]);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(osm->restriction);CHKERRQ(ierr); 451 if (osm->localization) {ierr = PetscFree(osm->localization);CHKERRQ(ierr);} 452 ierr = PetscFree(osm->prolongation);CHKERRQ(ierr); 453 ierr = PetscFree(osm->x);CHKERRQ(ierr); 454 ierr = PetscFree(osm->y);CHKERRQ(ierr); 455 ierr = PetscFree(osm->y_local);CHKERRQ(ierr); 456 } 457 if (osm->is) { 458 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 459 } 460 ierr = PetscFree(osm);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "PCSetFromOptions_ASM" 466 static PetscErrorCode PCSetFromOptions_ASM(PC pc) 467 { 468 PC_ASM *osm = (PC_ASM*)pc->data; 469 PetscErrorCode ierr; 470 PetscInt blocks,ovl; 471 PetscTruth symset,flg; 472 PCASMType asmtype; 473 474 PetscFunctionBegin; 475 /* set the type to symmetric if matrix is symmetric */ 476 if (!osm->type_set && pc->pmat) { 477 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 478 if (symset && flg) { osm->type = PC_ASM_BASIC; } 479 } 480 ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr); 481 ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 482 if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); } 483 ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 484 if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 485 flg = PETSC_FALSE; 486 ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr); 487 if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); } 488 ierr = PetscOptionsTail();CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 /*------------------------------------------------------------------------------------*/ 493 494 EXTERN_C_BEGIN 495 #undef __FUNCT__ 496 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM" 497 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[]) 498 { 499 PC_ASM *osm = (PC_ASM*)pc->data; 500 PetscErrorCode ierr; 501 PetscInt i; 502 503 PetscFunctionBegin; 504 if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 505 if (pc->setupcalled && (n != osm->n_local_true || is)) { 506 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup()."); 507 } 508 if (!pc->setupcalled) { 509 if (is) { 510 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 511 } 512 if (osm->is) { 513 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 514 } 515 osm->n_local_true = n; 516 osm->is = 0; 517 osm->is_local = 0; 518 if (is) { 519 ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr); 520 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 521 } 522 if (is_local) { 523 ierr = PetscMalloc(n*sizeof(IS *),&osm->is_local);CHKERRQ(ierr); 524 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 EXTERN_C_END 530 531 EXTERN_C_BEGIN 532 #undef __FUNCT__ 533 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM" 534 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is) 535 { 536 PC_ASM *osm = (PC_ASM*)pc->data; 537 PetscErrorCode ierr; 538 PetscMPIInt rank,size; 539 PetscInt n; 540 541 PetscFunctionBegin; 542 if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 543 if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet."); 544 545 /* 546 Split the subdomains equally amoung all processors 547 */ 548 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 549 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 550 n = N/size + ((N % size) > rank); 551 if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 552 if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup()."); 553 if (!pc->setupcalled) { 554 if (osm->is) { 555 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 556 } 557 osm->n_local_true = n; 558 osm->is = 0; 559 osm->is_local = 0; 560 } 561 PetscFunctionReturn(0); 562 } 563 EXTERN_C_END 564 565 EXTERN_C_BEGIN 566 #undef __FUNCT__ 567 #define __FUNCT__ "PCASMSetOverlap_ASM" 568 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl) 569 { 570 PC_ASM *osm = (PC_ASM*)pc->data; 571 572 PetscFunctionBegin; 573 if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 574 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup()."); 575 if (!pc->setupcalled) { 576 osm->overlap = ovl; 577 } 578 PetscFunctionReturn(0); 579 } 580 EXTERN_C_END 581 582 EXTERN_C_BEGIN 583 #undef __FUNCT__ 584 #define __FUNCT__ "PCASMSetType_ASM" 585 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type) 586 { 587 PC_ASM *osm = (PC_ASM*)pc->data; 588 589 PetscFunctionBegin; 590 osm->type = type; 591 osm->type_set = PETSC_TRUE; 592 PetscFunctionReturn(0); 593 } 594 EXTERN_C_END 595 596 EXTERN_C_BEGIN 597 #undef __FUNCT__ 598 #define __FUNCT__ "PCASMGetSubKSP_ASM" 599 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp) 600 { 601 PC_ASM *osm = (PC_ASM*)pc->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 if (osm->n_local_true < 1) { 606 SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 607 } 608 609 if (n_local) { 610 *n_local = osm->n_local_true; 611 } 612 if (first_local) { 613 ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 614 *first_local -= osm->n_local_true; 615 } 616 if (ksp) { 617 /* Assume that local solves are now different; not necessarily 618 true though! This flag is used only for PCView_ASM() */ 619 *ksp = osm->ksp; 620 osm->same_local_solves = PETSC_FALSE; 621 } 622 PetscFunctionReturn(0); 623 } 624 EXTERN_C_END 625 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "PCASMSetLocalSubdomains" 629 /*@C 630 PCASMSetLocalSubdomains - Sets the local subdomains (for this processor 631 only) for the additive Schwarz preconditioner. 632 633 Collective on PC 634 635 Input Parameters: 636 + pc - the preconditioner context 637 . n - the number of subdomains for this processor (default value = 1) 638 . is - the index sets that define the subdomains for this processor 639 (or PETSC_NULL for PETSc to determine subdomains) 640 - is_local - the index sets that define the local part of the subdomains for this processor 641 (or PETSC_NULL to use the default of 1 subdomain per process) 642 643 Notes: 644 The IS numbering is in the parallel, global numbering of the vector. 645 646 By default the ASM preconditioner uses 1 block per processor. 647 648 These index sets cannot be destroyed until after completion of the 649 linear solves for which the ASM preconditioner is being used. 650 651 Use PCASMSetTotalSubdomains() to set the subdomains for all processors. 652 653 Level: advanced 654 655 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 656 657 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 658 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 659 @*/ 660 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 661 { 662 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]); 663 664 PetscFunctionBegin; 665 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 666 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 667 if (f) { 668 ierr = (*f)(pc,n,is,is_local);CHKERRQ(ierr); 669 } 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "PCASMSetTotalSubdomains" 675 /*@C 676 PCASMSetTotalSubdomains - Sets the subdomains for all processor for the 677 additive Schwarz preconditioner. Either all or no processors in the 678 PC communicator must call this routine, with the same index sets. 679 680 Collective on PC 681 682 Input Parameters: 683 + pc - the preconditioner context 684 . n - the number of subdomains for all processors 685 . is - the index sets that define the subdomains for all processor 686 (or PETSC_NULL for PETSc to determine subdomains) 687 - is_local - the index sets that define the local part of the subdomains for this processor 688 (or PETSC_NULL to use the default of 1 subdomain per process) 689 690 Options Database Key: 691 To set the total number of subdomain blocks rather than specify the 692 index sets, use the option 693 . -pc_asm_blocks <blks> - Sets total blocks 694 695 Notes: 696 Currently you cannot use this to set the actual subdomains with the argument is. 697 698 By default the ASM preconditioner uses 1 block per processor. 699 700 These index sets cannot be destroyed until after completion of the 701 linear solves for which the ASM preconditioner is being used. 702 703 Use PCASMSetLocalSubdomains() to set local subdomains. 704 705 Level: advanced 706 707 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz 708 709 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 710 PCASMCreateSubdomains2D() 711 @*/ 712 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[]) 713 { 714 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]); 715 716 PetscFunctionBegin; 717 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 718 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 719 if (f) { 720 ierr = (*f)(pc,N,is,is_local);CHKERRQ(ierr); 721 } 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "PCASMSetOverlap" 727 /*@ 728 PCASMSetOverlap - Sets the overlap between a pair of subdomains for the 729 additive Schwarz preconditioner. Either all or no processors in the 730 PC communicator must call this routine. 731 732 Collective on PC 733 734 Input Parameters: 735 + pc - the preconditioner context 736 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 737 738 Options Database Key: 739 . -pc_asm_overlap <ovl> - Sets overlap 740 741 Notes: 742 By default the ASM preconditioner uses 1 block per processor. To use 743 multiple blocks per perocessor, see PCASMSetTotalSubdomains() and 744 PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>). 745 746 The overlap defaults to 1, so if one desires that no additional 747 overlap be computed beyond what may have been set with a call to 748 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl 749 must be set to be 0. In particular, if one does not explicitly set 750 the subdomains an application code, then all overlap would be computed 751 internally by PETSc, and using an overlap of 0 would result in an ASM 752 variant that is equivalent to the block Jacobi preconditioner. 753 754 Note that one can define initial index sets with any overlap via 755 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine 756 PCASMSetOverlap() merely allows PETSc to extend that overlap further 757 if desired. 758 759 Level: intermediate 760 761 .keywords: PC, ASM, set, overlap 762 763 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 764 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 765 @*/ 766 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl) 767 { 768 PetscErrorCode ierr,(*f)(PC,PetscInt); 769 770 PetscFunctionBegin; 771 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 772 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr); 773 if (f) { 774 ierr = (*f)(pc,ovl);CHKERRQ(ierr); 775 } 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "PCASMSetType" 781 /*@ 782 PCASMSetType - Sets the type of restriction and interpolation used 783 for local problems in the additive Schwarz method. 784 785 Collective on PC 786 787 Input Parameters: 788 + pc - the preconditioner context 789 - type - variant of ASM, one of 790 .vb 791 PC_ASM_BASIC - full interpolation and restriction 792 PC_ASM_RESTRICT - full restriction, local processor interpolation 793 PC_ASM_INTERPOLATE - full interpolation, local processor restriction 794 PC_ASM_NONE - local processor restriction and interpolation 795 .ve 796 797 Options Database Key: 798 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 799 800 Level: intermediate 801 802 .keywords: PC, ASM, set, type 803 804 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(), 805 PCASMCreateSubdomains2D() 806 @*/ 807 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type) 808 { 809 PetscErrorCode ierr,(*f)(PC,PCASMType); 810 811 PetscFunctionBegin; 812 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 813 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr); 814 if (f) { 815 ierr = (*f)(pc,type);CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "PCASMGetSubKSP" 822 /*@C 823 PCASMGetSubKSP - Gets the local KSP contexts for all blocks on 824 this processor. 825 826 Collective on PC iff first_local is requested 827 828 Input Parameter: 829 . pc - the preconditioner context 830 831 Output Parameters: 832 + n_local - the number of blocks on this processor or PETSC_NULL 833 . first_local - the global number of the first block on this processor or PETSC_NULL, 834 all processors must request or all must pass PETSC_NULL 835 - ksp - the array of KSP contexts 836 837 Note: 838 After PCASMGetSubKSP() the array of KSPes is not to be freed 839 840 Currently for some matrix implementations only 1 block per processor 841 is supported. 842 843 You must call KSPSetUp() before calling PCASMGetSubKSP(). 844 845 Level: advanced 846 847 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context 848 849 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(), 850 PCASMCreateSubdomains2D(), 851 @*/ 852 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 853 { 854 PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **); 855 856 PetscFunctionBegin; 857 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 858 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr); 859 if (f) { 860 ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr); 861 } else { 862 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC"); 863 } 864 865 PetscFunctionReturn(0); 866 } 867 868 /* -------------------------------------------------------------------------------------*/ 869 /*MC 870 PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 871 its own KSP object. 872 873 Options Database Keys: 874 + -pc_asm_truelocal - Activates PCASMSetUseTrueLocal() 875 . -pc_asm_blocks <blks> - Sets total blocks 876 . -pc_asm_overlap <ovl> - Sets overlap 877 - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 878 879 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 880 will get a different convergence rate due to the default option of -pc_asm_type restrict. Use 881 -pc_asm_type basic to use the standard ASM. 882 883 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 884 than one processor. Defaults to one block per processor. 885 886 To set options on the solvers for each block append -sub_ to all the KSP, and PC 887 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 888 889 To set the options on the solvers separate for each block call PCASMGetSubKSP() 890 and set the options directly on the resulting KSP object (you can access its PC 891 with KSPGetPC()) 892 893 894 Level: beginner 895 896 Concepts: additive Schwarz method 897 898 References: 899 An additive variant of the Schwarz alternating method for the case of many subregions 900 M Dryja, OB Widlund - Courant Institute, New York University Technical report 901 902 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 903 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 904 905 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 906 PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(), 907 PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType() 908 909 M*/ 910 911 EXTERN_C_BEGIN 912 #undef __FUNCT__ 913 #define __FUNCT__ "PCCreate_ASM" 914 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc) 915 { 916 PetscErrorCode ierr; 917 PC_ASM *osm; 918 919 PetscFunctionBegin; 920 ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr); 921 osm->n = PETSC_DECIDE; 922 osm->n_local = 0; 923 osm->n_local_true = 0; 924 osm->overlap = 1; 925 osm->ksp = 0; 926 osm->restriction = 0; 927 osm->localization = 0; 928 osm->prolongation = 0; 929 osm->x = 0; 930 osm->y = 0; 931 osm->y_local = 0; 932 osm->is = 0; 933 osm->is_local = 0; 934 osm->mat = 0; 935 osm->pmat = 0; 936 osm->type = PC_ASM_RESTRICT; 937 osm->same_local_solves = PETSC_TRUE; 938 939 pc->data = (void*)osm; 940 pc->ops->apply = PCApply_ASM; 941 pc->ops->applytranspose = PCApplyTranspose_ASM; 942 pc->ops->setup = PCSetUp_ASM; 943 pc->ops->destroy = PCDestroy_ASM; 944 pc->ops->setfromoptions = PCSetFromOptions_ASM; 945 pc->ops->setuponblocks = PCSetUpOnBlocks_ASM; 946 pc->ops->view = PCView_ASM; 947 pc->ops->applyrichardson = 0; 948 949 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM", 950 PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr); 951 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM", 952 PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr); 953 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM", 954 PCASMSetOverlap_ASM);CHKERRQ(ierr); 955 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM", 956 PCASMSetType_ASM);CHKERRQ(ierr); 957 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM", 958 PCASMGetSubKSP_ASM);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 EXTERN_C_END 962 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "PCASMCreateSubdomains" 966 /*@C 967 PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 968 preconditioner for a any problem on a general grid. 969 970 Collective 971 972 Input Parameters: 973 + A - The global matrix operator 974 - n - the number of local blocks 975 976 Output Parameters: 977 . outis - the array of index sets defining the subdomains 978 979 Level: advanced 980 981 Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap 982 from these if you use PCASMSetLocalSubdomains() 983 984 In the Fortran version you must provide the array outis[] already allocated of length n. 985 986 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 987 988 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains() 989 @*/ 990 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 991 { 992 MatPartitioning mpart; 993 const char *prefix; 994 PetscErrorCode (*f)(Mat,PetscTruth*,MatReuse,Mat*); 995 PetscMPIInt size; 996 PetscInt i,j,rstart,rend,bs; 997 PetscTruth iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 998 Mat Ad = PETSC_NULL, adj; 999 IS ispart,isnumb,*is; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1004 PetscValidPointer(outis,3); 1005 if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);} 1006 1007 /* Get prefix, row distribution, and block size */ 1008 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1009 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1010 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1011 if (rstart/bs*bs != rstart || rend/bs*bs != rend) { 1012 SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1013 } 1014 /* Get diagonal block from matrix if possible */ 1015 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1016 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1017 if (f) { 1018 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1019 } else if (size == 1) { 1020 iscopy = PETSC_FALSE; Ad = A; 1021 } else { 1022 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1023 } 1024 if (Ad) { 1025 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1026 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1027 } 1028 if (Ad && n > 1) { 1029 PetscTruth match,done; 1030 /* Try to setup a good matrix partitioning if available */ 1031 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1032 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1033 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1034 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr); 1035 if (!match) { 1036 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr); 1037 } 1038 if (!match) { /* assume a "good" partitioner is available */ 1039 PetscInt na,*ia,*ja; 1040 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1041 if (done) { 1042 /* Build adjacency matrix by hand. Unfortunately a call to 1043 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1044 remove the block-aij structure and we cannot expect 1045 MatPartitioning to split vertices as we need */ 1046 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1047 nnz = 0; 1048 for (i=0; i<na; i++) { /* count number of nonzeros */ 1049 len = ia[i+1] - ia[i]; 1050 row = ja + ia[i]; 1051 for (j=0; j<len; j++) { 1052 if (row[j] == i) { /* don't count diagonal */ 1053 len--; break; 1054 } 1055 } 1056 nnz += len; 1057 } 1058 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1059 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1060 nnz = 0; 1061 iia[0] = 0; 1062 for (i=0; i<na; i++) { /* fill adjacency */ 1063 cnt = 0; 1064 len = ia[i+1] - ia[i]; 1065 row = ja + ia[i]; 1066 for (j=0; j<len; j++) { 1067 if (row[j] != i) { /* if not diagonal */ 1068 jja[nnz+cnt++] = row[j]; 1069 } 1070 } 1071 nnz += cnt; 1072 iia[i+1] = nnz; 1073 } 1074 /* Partitioning of the adjacency matrix */ 1075 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1076 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1077 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1078 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1079 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1080 ierr = MatDestroy(adj);CHKERRQ(ierr); 1081 foundpart = PETSC_TRUE; 1082 } 1083 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1084 } 1085 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1086 } 1087 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1088 1089 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1090 *outis = is; 1091 1092 if (!foundpart) { 1093 1094 /* Partitioning by contiguous chunks of rows */ 1095 1096 PetscInt mbs = (rend-rstart)/bs; 1097 PetscInt start = rstart; 1098 for (i=0; i<n; i++) { 1099 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1100 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1101 start += count; 1102 } 1103 1104 } else { 1105 1106 /* Partitioning by adjacency of diagonal block */ 1107 1108 const PetscInt *numbering; 1109 PetscInt *count,nidx,*indices,*newidx,start=0; 1110 /* Get node count in each partition */ 1111 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1112 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1113 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1114 for (i=0; i<n; i++) count[i] *= bs; 1115 } 1116 /* Build indices from node numbering */ 1117 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1118 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1119 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1120 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1121 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1122 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1123 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1124 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1125 for (i=0; i<nidx; i++) 1126 for (j=0; j<bs; j++) 1127 newidx[i*bs+j] = indices[i]*bs + j; 1128 ierr = PetscFree(indices);CHKERRQ(ierr); 1129 nidx *= bs; 1130 indices = newidx; 1131 } 1132 /* Shift to get global indices */ 1133 for (i=0; i<nidx; i++) indices[i] += rstart; 1134 1135 /* Build the index sets for each block */ 1136 for (i=0; i<n; i++) { 1137 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr); 1138 ierr = ISSort(is[i]);CHKERRQ(ierr); 1139 start += count[i]; 1140 } 1141 1142 ierr = PetscFree(count); 1143 ierr = PetscFree(indices); 1144 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1145 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1146 1147 } 1148 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "PCASMDestroySubdomains" 1154 /*@C 1155 PCASMDestroySubdomains - Destroys the index sets created with 1156 PCASMCreateSubdomains(). Should be called after setting subdomains 1157 with PCASMSetLocalSubdomains(). 1158 1159 Collective 1160 1161 Input Parameters: 1162 + n - the number of index sets 1163 . is - the array of index sets 1164 - is_local - the array of local index sets, can be PETSC_NULL 1165 1166 Level: advanced 1167 1168 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 1169 1170 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains() 1171 @*/ 1172 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1173 { 1174 PetscInt i; 1175 PetscErrorCode ierr; 1176 PetscFunctionBegin; 1177 if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1178 PetscValidPointer(is,2); 1179 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1180 ierr = PetscFree(is);CHKERRQ(ierr); 1181 if (is_local) { 1182 PetscValidPointer(is_local,3); 1183 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1184 ierr = PetscFree(is_local);CHKERRQ(ierr); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "PCASMCreateSubdomains2D" 1191 /*@ 1192 PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1193 preconditioner for a two-dimensional problem on a regular grid. 1194 1195 Not Collective 1196 1197 Input Parameters: 1198 + m, n - the number of mesh points in the x and y directions 1199 . M, N - the number of subdomains in the x and y directions 1200 . dof - degrees of freedom per node 1201 - overlap - overlap in mesh lines 1202 1203 Output Parameters: 1204 + Nsub - the number of subdomains created 1205 - is - the array of index sets defining the subdomains 1206 1207 Note: 1208 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1209 preconditioners. More general related routines are 1210 PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains(). 1211 1212 Level: advanced 1213 1214 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid 1215 1216 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 1217 PCASMSetOverlap() 1218 @*/ 1219 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is) 1220 { 1221 PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter; 1222 PetscErrorCode ierr; 1223 PetscInt nidx,*idx,loc,ii,jj,count; 1224 1225 PetscFunctionBegin; 1226 if (dof != 1) SETERRQ(PETSC_ERR_SUP," "); 1227 1228 *Nsub = N*M; 1229 ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr); 1230 ystart = 0; 1231 loc_outter = 0; 1232 for (i=0; i<N; i++) { 1233 height = n/N + ((n % N) > i); /* height of subdomain */ 1234 if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n"); 1235 yleft = ystart - overlap; if (yleft < 0) yleft = 0; 1236 yright = ystart + height + overlap; if (yright > n) yright = n; 1237 xstart = 0; 1238 for (j=0; j<M; j++) { 1239 width = m/M + ((m % M) > j); /* width of subdomain */ 1240 if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m"); 1241 xleft = xstart - overlap; if (xleft < 0) xleft = 0; 1242 xright = xstart + width + overlap; if (xright > m) xright = m; 1243 nidx = (xright - xleft)*(yright - yleft); 1244 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1245 loc = 0; 1246 for (ii=yleft; ii<yright; ii++) { 1247 count = m*ii + xleft; 1248 for (jj=xleft; jj<xright; jj++) { 1249 idx[loc++] = count++; 1250 } 1251 } 1252 ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr); 1253 ierr = PetscFree(idx);CHKERRQ(ierr); 1254 xstart += width; 1255 } 1256 ystart += height; 1257 } 1258 for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "PCASMGetLocalSubdomains" 1264 /*@C 1265 PCASMGetLocalSubdomains - Gets the local subdomains (for this processor 1266 only) for the additive Schwarz preconditioner. 1267 1268 Collective on PC 1269 1270 Input Parameter: 1271 . pc - the preconditioner context 1272 1273 Output Parameters: 1274 + n - the number of subdomains for this processor (default value = 1) 1275 . is - the index sets that define the subdomains for this processor 1276 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1277 1278 1279 Notes: 1280 The IS numbering is in the parallel, global numbering of the vector. 1281 1282 Level: advanced 1283 1284 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 1285 1286 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1287 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices() 1288 @*/ 1289 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1290 { 1291 PC_ASM *osm; 1292 PetscErrorCode ierr; 1293 PetscTruth match; 1294 1295 PetscFunctionBegin; 1296 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1297 PetscValidIntPointer(n,2); 1298 if (is) PetscValidPointer(is,3); 1299 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1300 if (!match) { 1301 if (n) *n = 0; 1302 if (is) *is = PETSC_NULL; 1303 } else { 1304 osm = (PC_ASM*)pc->data; 1305 if (n) *n = osm->n_local_true; 1306 if (is) *is = osm->is; 1307 if (is_local) *is_local = osm->is_local; 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "PCASMGetLocalSubmatrices" 1314 /*@C 1315 PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1316 only) for the additive Schwarz preconditioner. 1317 1318 Collective on PC 1319 1320 Input Parameter: 1321 . pc - the preconditioner context 1322 1323 Output Parameters: 1324 + n - the number of matrices for this processor (default value = 1) 1325 - mat - the matrices 1326 1327 1328 Level: advanced 1329 1330 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi 1331 1332 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1333 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains() 1334 @*/ 1335 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1336 { 1337 PC_ASM *osm; 1338 PetscErrorCode ierr; 1339 PetscTruth match; 1340 1341 PetscFunctionBegin; 1342 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1343 PetscValidIntPointer(n,2); 1344 if (mat) PetscValidPointer(mat,3); 1345 if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1346 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1347 if (!match) { 1348 if (n) *n = 0; 1349 if (mat) *mat = PETSC_NULL; 1350 } else { 1351 osm = (PC_ASM*)pc->data; 1352 if (n) *n = osm->n_local_true; 1353 if (mat) *mat = osm->pmat; 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357