1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may intersect multiple subdomains and any subdomain may 4 intersect multiple processors. Intersections of subdomains with processors are called *local 5 subdomains*. 6 7 N - total number of local subdomains on all processors (set in PCGASMSetTotalSubdomains() or calculated in PCSetUp_GASM()) 8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains()) 9 nmax - maximum number of local subdomains per processor (calculated in PCGASMSetTotalSubdomains() or in PCSetUp_GASM()) 10 */ 11 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */ 20 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */ 21 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */ 22 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */ 23 Mat *pmat; /* subdomain block matrices */ 24 PCGASMType type; /* use reduced interpolation, restriction or both */ 25 PetscBool create_local; /* whether the autocreated subdomains are local or not. */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMSubdomainView_Private" 33 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 PetscInt j,nidx; 37 const PetscInt *idx; 38 PetscViewer sviewer; 39 char *cidx; 40 PetscErrorCode ierr; 41 42 PetscFunctionBegin; 43 if(i < 0 || i > osm->n) SETERRQ2(((PetscObject)viewer)->comm, PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 44 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr); 45 /* 46 No more than 15 characters per index plus a space. 47 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 48 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 49 For nidx == 0, the whole string 16 '\0'. 50 */ 51 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 52 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer); CHKERRQ(ierr); 53 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr); 54 for(j = 0; j < nidx; ++j) { 55 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]); CHKERRQ(ierr); 56 } 57 ierr = PetscViewerDestroy(&sviewer); CHKERRQ(ierr); 58 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr); 59 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr); 60 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 61 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 62 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 63 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 64 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 65 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 66 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 67 ierr = PetscFree(cidx);CHKERRQ(ierr); 68 if (osm->iis) { 69 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr); 70 /* 71 No more than 15 characters per index plus a space. 72 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 73 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 74 For nidx == 0, the whole string 16 '\0'. 75 */ 76 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 77 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer); CHKERRQ(ierr); 78 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr); 79 for(j = 0; j < nidx; ++j) { 80 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 81 } 82 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr); 83 ierr = PetscViewerDestroy(&sviewer); CHKERRQ(ierr); 84 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr); 85 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 87 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 88 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 89 ierr = PetscFree(cidx);CHKERRQ(ierr); 90 } 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "PCGASMPrintSubdomains" 96 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 97 { 98 PC_GASM *osm = (PC_GASM*)pc->data; 99 const char *prefix; 100 char fname[PETSC_MAX_PATH_LEN+1]; 101 PetscViewer viewer; 102 PetscInt i; 103 PetscBool found; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 108 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 109 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 110 for (i=0;i<osm->n;++i) { 111 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->ois)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 112 ierr = PCGASMSubdomainView_Private(pc, i, viewer); CHKERRQ(ierr); 113 } 114 PetscFunctionReturn(0); 115 } 116 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCView_GASM" 120 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 121 { 122 PC_GASM *osm = (PC_GASM*)pc->data; 123 const char *prefix; 124 PetscErrorCode ierr; 125 PetscMPIInt rank, size; 126 PetscInt i,bsz; 127 PetscBool iascii,view_subdomains=PETSC_FALSE; 128 PetscViewer sviewer; 129 PetscInt count, l, gcount, *numbering, *permutation; 130 char overlap[256] = "user-defined overlap"; 131 char gsubdomains[256] = "unknown total number of subdomains"; 132 char lsubdomains[256] = "unknown number of local subdomains"; 133 char msubdomains[256] = "unknown max number of local subdomains"; 134 PetscFunctionBegin; 135 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 136 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 137 138 139 ierr = PetscMalloc2(osm->n, PetscInt, &permutation, osm->n, PetscInt, &numbering); CHKERRQ(ierr); 140 for(i = 0; i < osm->n; ++i) permutation[i] = i; 141 ierr = PetscObjectsGetGlobalNumbering(((PetscObject)pc)->comm, osm->n, (PetscObject*)osm->ois, &gcount, numbering); CHKERRQ(ierr); 142 ierr = PetscSortIntWithPermutation(osm->n, numbering, permutation); CHKERRQ(ierr); 143 144 if(osm->overlap >= 0) { 145 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap); CHKERRQ(ierr); 146 } 147 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",gcount); CHKERRQ(ierr); 148 if(osm->N > 0) { 149 ierr = PetscSNPrintf(lsubdomains, sizeof(gsubdomains), "number of local subdomains = %D",osm->N); CHKERRQ(ierr); 150 } 151 if(osm->nmax > 0){ 152 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 153 } 154 155 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 156 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_view_subdomains",&view_subdomains,PETSC_NULL);CHKERRQ(ierr); 157 158 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii); CHKERRQ(ierr); 159 if (iascii) { 160 /* 161 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 162 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 163 collectively on the comm. 164 */ 165 ierr = PetscObjectName((PetscObject)viewer); CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer,"Generalized additive Schwarz:\n"); CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer,"Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",overlap); CHKERRQ(ierr); 169 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",gsubdomains);CHKERRQ(ierr); 170 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",lsubdomains);CHKERRQ(ierr); 171 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",msubdomains);CHKERRQ(ierr); 172 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 173 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 174 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 175 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 176 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */ 177 ierr = PetscViewerASCIIPrintf(viewer,"Subdomain solver info is as follows:\n");CHKERRQ(ierr); 178 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 179 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 180 /* Make sure that everybody waits for the banner to be printed. */ 181 ierr = MPI_Barrier(((PetscObject)viewer)->comm); CHKERRQ(ierr); 182 l = 0; 183 for(count = 0; count < gcount; ++count) { 184 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 185 PetscMPIInt srank, ssize; 186 if(l<osm->n){ 187 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 188 if(numbering[d] == count) { 189 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize); CHKERRQ(ierr); 190 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank); CHKERRQ(ierr); 191 ierr = PetscViewerGetSubcomm(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr); 193 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr); 194 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%D:%D] (subcomm [%D:%D]) local subdomain number %D, local size = %D\n",(int)rank,(int)size,(int)srank,(int)ssize,d,bsz);CHKERRQ(ierr); 195 ierr = PetscViewerFlush(sviewer); CHKERRQ(ierr); 196 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr); 197 if(view_subdomains) { 198 ierr = PCGASMSubdomainView_Private(pc,d,sviewer); CHKERRQ(ierr); 199 } 200 if(!pc->setupcalled) { 201 PetscViewerASCIIPrintf(sviewer, "Solver not set up yet: PCSetUp() not yet called\n"); CHKERRQ(ierr); 202 } 203 else { 204 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 205 } 206 ierr = PetscViewerASCIIPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 207 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 208 ierr = PetscViewerRestoreSubcomm(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 209 ++l; 210 } 211 } 212 ierr = MPI_Barrier(((PetscObject)pc)->comm); CHKERRQ(ierr); 213 } 214 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 215 } else { 216 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 217 } 218 ierr = PetscFree2(permutation,numbering); CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 223 224 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "PCSetUp_GASM" 228 static PetscErrorCode PCSetUp_GASM(PC pc) 229 { 230 PC_GASM *osm = (PC_GASM*)pc->data; 231 PetscErrorCode ierr; 232 PetscBool symset,flg; 233 PetscInt i; 234 PetscMPIInt rank, size; 235 MatReuse scall = MAT_REUSE_MATRIX; 236 KSP ksp; 237 PC subpc; 238 const char *prefix,*pprefix; 239 Vec x,y; 240 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */ 241 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */ 242 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */ 243 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */ 244 IS gois; /* Disjoint union the global indices of outer subdomains. */ 245 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */ 246 PetscScalar *gxarray, *gyarray; 247 PetscInt gofirst; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- 248 over the disjoint union of outer subdomains. */ 249 DM *domain_dm = PETSC_NULL; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 253 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 254 if (!pc->setupcalled) { 255 256 if (!osm->type_set) { 257 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 258 if (symset && flg) { osm->type = PC_GASM_BASIC; } 259 } 260 261 /* 262 If subdomains have been set, then the local number of subdomains, osm->n, is NOT PETSC_DECIDE and is at least 1. 263 The total number of subdomains, osm->N is not necessarily set, might be PETSC_DECIDE, and then will have to be calculated from osm->n. 264 */ 265 if (osm->n == PETSC_DECIDE) { 266 /* no subdomains given */ 267 /* try pc->dm first */ 268 if(pc->dm) { 269 char ddm_name[1024]; 270 DM ddm; 271 PetscBool flg; 272 PetscInt num_domains, d; 273 char **domain_names; 274 IS *domain_is; 275 /* Allow the user to request a decomposition DM by name */ 276 ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr); 277 ierr = PetscOptionsString("-pc_gasm_decomposition","Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr); 278 if(flg) { 279 ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr); 280 if(!ddm) { 281 SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name); 282 } 283 ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr); 284 ierr = PCSetDM(pc,ddm); CHKERRQ(ierr); 285 } 286 ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm); CHKERRQ(ierr); 287 if(num_domains) { 288 ierr = PCGASMSetSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr); 289 } 290 for(d = 0; d < num_domains; ++d) { 291 ierr = PetscFree(domain_names[d]); CHKERRQ(ierr); 292 ierr = ISDestroy(&domain_is[d]); CHKERRQ(ierr); 293 } 294 ierr = PetscFree(domain_names);CHKERRQ(ierr); 295 ierr = PetscFree(domain_is);CHKERRQ(ierr); 296 } 297 if (osm->n == PETSC_DECIDE) { /* still no subdomains; use one per processor */ 298 osm->nmax = osm->n = 1; 299 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 300 osm->N = size; 301 } 302 } 303 if (osm->N == PETSC_DECIDE) { 304 PetscInt inwork[2], outwork[2]; 305 /* determine global number of subdomains and the max number of local subdomains */ 306 inwork[0] = inwork[1] = osm->n; 307 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 308 osm->nmax = outwork[0]; 309 osm->N = outwork[1]; 310 } 311 if (!osm->iis){ 312 /* 313 The local number of subdomains was set in PCGASMSetTotalSubdomains() or PCGASMSetSubdomains(), 314 but the actual subdomains have not been supplied (in PCGASMSetSubdomains()). 315 We create the requisite number of inner subdomains on PETSC_COMM_SELF (for now). 316 */ 317 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->overlap,osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 318 } 319 320 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 321 flg = PETSC_FALSE; 322 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 323 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 324 325 if (osm->sort_indices) { 326 for (i=0; i<osm->n; i++) { 327 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr); 328 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr); 329 } 330 } 331 /* 332 Merge the ISs, create merged vectors and restrictions. 333 */ 334 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */ 335 on = 0; 336 for (i=0; i<osm->n; i++) { 337 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 338 on += oni; 339 } 340 ierr = PetscMalloc(on*sizeof(PetscInt), &oidx);CHKERRQ(ierr); 341 on = 0; 342 for (i=0; i<osm->n; i++) { 343 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 344 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 345 ierr = PetscMemcpy(oidx+on, oidxi, sizeof(PetscInt)*oni);CHKERRQ(ierr); 346 ierr = ISRestoreIndices(osm->ois[i], &oidxi);CHKERRQ(ierr); 347 on += oni; 348 } 349 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, on, oidx, PETSC_OWN_POINTER, &gois);CHKERRQ(ierr); 350 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 351 ierr = VecCreateMPI(((PetscObject)pc)->comm, on, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 352 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 353 ierr = VecGetOwnershipRange(osm->gx, &gofirst, PETSC_NULL);CHKERRQ(ierr); 354 ierr = ISCreateStride(((PetscObject)pc)->comm,on,gofirst,1, &goid);CHKERRQ(ierr); 355 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr); 356 ierr = VecDestroy(&x); CHKERRQ(ierr); 357 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */ 358 { PetscInt ini; /* Number of indices the i-th a local inner subdomain. */ 359 PetscInt in; /* Number of indices in the disjoint uniont of local inner subdomains. */ 360 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */ 361 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 362 IS giis; /* IS for the disjoint union of inner subdomains. */ 363 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 364 /**/ 365 in = 0; 366 for (i=0; i<osm->n; i++) { 367 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 368 in += ini; 369 } 370 ierr = PetscMalloc(in*sizeof(PetscInt), &iidx); CHKERRQ(ierr); 371 ierr = PetscMalloc(in*sizeof(PetscInt), &ioidx);CHKERRQ(ierr); 372 ierr = VecGetOwnershipRange(osm->gx,&gofirst, PETSC_NULL); CHKERRQ(ierr); 373 in = 0; 374 on = 0; 375 for (i=0; i<osm->n; i++) { 376 const PetscInt *iidxi; /* Global indices of the i-th local inner subdomain. */ 377 ISLocalToGlobalMapping ltogi; /* Map from global to local indices of the i-th outer local subdomain. */ 378 PetscInt *ioidxi; /* Local indices of the i-th local inner subdomain within the local outer subdomain. */ 379 PetscInt ioni; /* Number of indices in ioidxi; if ioni != ini the inner subdomain is not a subdomain of the outer subdomain (error). */ 380 PetscInt k; 381 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 382 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 383 ierr = ISGetIndices(osm->iis[i],&iidxi);CHKERRQ(ierr); 384 ierr = PetscMemcpy(iidx+in, iidxi, sizeof(PetscInt)*ini);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingCreateIS(osm->ois[i],<ogi);CHKERRQ(ierr); 386 ioidxi = ioidx+in; 387 ierr = ISGlobalToLocalMappingApply(ltogi,IS_GTOLM_DROP,ini,iidxi,&ioni,ioidxi);CHKERRQ(ierr); 388 ierr = ISLocalToGlobalMappingDestroy(<ogi);CHKERRQ(ierr); 389 ierr = ISRestoreIndices(osm->iis[i], &iidxi);CHKERRQ(ierr); 390 if (ioni != ini) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner subdomain %D contains %D indices outside of its outer subdomain", i, ini - ioni); 391 for(k = 0; k < ini; ++k) { 392 ioidxi[k] += gofirst+on; 393 } 394 in += ini; 395 on += oni; 396 } 397 ierr = ISCreateGeneral(((PetscObject)pc)->comm, in, iidx, PETSC_OWN_POINTER, &giis);CHKERRQ(ierr); 398 ierr = ISCreateGeneral(((PetscObject)pc)->comm, in, ioidx, PETSC_OWN_POINTER, &giois);CHKERRQ(ierr); 399 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr); 400 ierr = VecDestroy(&y); CHKERRQ(ierr); 401 ierr = ISDestroy(&giis); CHKERRQ(ierr); 402 ierr = ISDestroy(&giois); CHKERRQ(ierr); 403 } 404 ierr = ISDestroy(&goid);CHKERRQ(ierr); 405 /* Create the subdomain work vectors. */ 406 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 407 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 408 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 409 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 410 for (i=0, on=0; i<osm->n; ++i, on += oni) { 411 PetscInt oNi; 412 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 413 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr); 414 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr); 415 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr); 416 } 417 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 418 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 419 /* Create the local solvers */ 420 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 421 for (i=0; i<osm->n; i++) { /* KSPs are local */ 422 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr); 423 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 424 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 425 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 426 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 427 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 428 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 429 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 430 osm->ksp[i] = ksp; 431 } 432 scall = MAT_INITIAL_MATRIX; 433 434 }/*if(!pc->setupcalled)*/ 435 else { 436 /* 437 Destroy the blocks from the previous iteration 438 */ 439 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 440 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 441 scall = MAT_INITIAL_MATRIX; 442 } 443 } 444 445 /* 446 Extract out the submatrices. 447 */ 448 if(size > 1) { 449 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->ois, osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 450 } 451 else { 452 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->ois, osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 453 } 454 if (scall == MAT_INITIAL_MATRIX) { 455 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 456 for (i=0; i<osm->n; i++) { 457 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 458 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 459 } 460 } 461 462 /* Return control to the user so that the submatrices can be modified (e.g., to apply 463 different boundary conditions for the submatrices than for the global problem) */ 464 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 465 466 /* 467 Loop over submatrices putting them into local ksps 468 */ 469 for (i=0; i<osm->n; i++) { 470 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 471 if (!pc->setupcalled) { 472 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 473 } 474 } 475 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 481 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 482 { 483 PC_GASM *osm = (PC_GASM*)pc->data; 484 PetscErrorCode ierr; 485 PetscInt i; 486 487 PetscFunctionBegin; 488 for (i=0; i<osm->n; i++) { 489 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "PCApply_GASM" 496 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 497 { 498 PC_GASM *osm = (PC_GASM*)pc->data; 499 PetscErrorCode ierr; 500 PetscInt i; 501 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 502 503 PetscFunctionBegin; 504 /* 505 Support for limiting the restriction or interpolation only to the inner 506 subdomain values (leaving the other values 0). 507 */ 508 if(!(osm->type & PC_GASM_RESTRICT)) { 509 /* have to zero the work RHS since scatter may leave some slots empty */ 510 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 511 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 512 } 513 else { 514 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 515 } 516 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 517 if(!(osm->type & PC_GASM_RESTRICT)) { 518 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 519 } 520 else { 521 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 522 } 523 /* do the subdomain solves */ 524 for (i=0; i<osm->n; ++i) { 525 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 526 } 527 ierr = VecZeroEntries(y); CHKERRQ(ierr); 528 if(!(osm->type & PC_GASM_INTERPOLATE)) { 529 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 530 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); PetscFunctionReturn(0); 531 } 532 else { 533 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 534 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); PetscFunctionReturn(0); 535 } 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCApplyTranspose_GASM" 540 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 541 { 542 PC_GASM *osm = (PC_GASM*)pc->data; 543 PetscErrorCode ierr; 544 PetscInt i; 545 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 546 547 PetscFunctionBegin; 548 /* 549 Support for limiting the restriction or interpolation to only local 550 subdomain values (leaving the other values 0). 551 552 Note: these are reversed from the PCApply_GASM() because we are applying the 553 transpose of the three terms 554 */ 555 if (!(osm->type & PC_GASM_INTERPOLATE)) { 556 /* have to zero the work RHS since scatter may leave some slots empty */ 557 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 558 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 559 } 560 else { 561 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 562 } 563 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 564 if (!(osm->type & PC_GASM_INTERPOLATE)) { 565 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 566 } 567 else { 568 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 569 } 570 /* do the local solves */ 571 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 572 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 573 } 574 ierr = VecZeroEntries(y); CHKERRQ(ierr); 575 if (!(osm->type & PC_GASM_RESTRICT)) { 576 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 577 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 578 } 579 else { 580 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 581 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 582 } 583 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "PCReset_GASM" 589 static PetscErrorCode PCReset_GASM(PC pc) 590 { 591 PC_GASM *osm = (PC_GASM*)pc->data; 592 PetscErrorCode ierr; 593 PetscInt i; 594 595 PetscFunctionBegin; 596 if (osm->ksp) { 597 for (i=0; i<osm->n; i++) { 598 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 599 } 600 } 601 if (osm->pmat) { 602 if (osm->n > 0) { 603 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 604 } 605 } 606 if (osm->x) { 607 for (i=0; i<osm->n; i++) { 608 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 609 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 610 } 611 } 612 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 613 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 614 615 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr); 616 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr); 617 if (osm->ois) { 618 ierr = PCGASMDestroySubdomains(osm->n,osm->ois,osm->iis);CHKERRQ(ierr); 619 osm->ois = 0; 620 osm->iis = 0; 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "PCDestroy_GASM" 627 static PetscErrorCode PCDestroy_GASM(PC pc) 628 { 629 PC_GASM *osm = (PC_GASM*)pc->data; 630 PetscErrorCode ierr; 631 PetscInt i; 632 633 PetscFunctionBegin; 634 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 635 if (osm->ksp) { 636 for (i=0; i<osm->n; i++) { 637 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 638 } 639 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 640 } 641 ierr = PetscFree(osm->x);CHKERRQ(ierr); 642 ierr = PetscFree(osm->y);CHKERRQ(ierr); 643 ierr = PetscFree(pc->data);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "PCSetFromOptions_GASM" 649 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 650 PC_GASM *osm = (PC_GASM*)pc->data; 651 PetscErrorCode ierr; 652 PetscInt blocks,ovl; 653 PetscBool symset,flg; 654 PCGASMType gasmtype; 655 656 PetscFunctionBegin; 657 /* set the type to symmetric if matrix is symmetric */ 658 if (!osm->type_set && pc->pmat) { 659 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 660 if (symset && flg) { osm->type = PC_GASM_BASIC; } 661 } 662 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 663 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 664 osm->create_local = PETSC_TRUE; 665 ierr = PetscOptionsBool("-pc_gasm_subdomains_create_local","Whether to make autocreated subdomains local (true by default)","PCGASMSetTotalSubdomains",osm->create_local,&osm->create_local,&flg);CHKERRQ(ierr); 666 if(!osm->create_local) SETERRQ(((PetscObject)pc)->comm, PETSC_ERR_SUP, "No support for autocreation of nonlocal subdomains yet."); 667 668 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks,osm->create_local);CHKERRQ(ierr); } 669 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 670 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 671 flg = PETSC_FALSE; 672 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 673 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 674 ierr = PetscOptionsTail();CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 /*------------------------------------------------------------------------------------*/ 679 680 EXTERN_C_BEGIN 681 #undef __FUNCT__ 682 #define __FUNCT__ "PCGASMSetSubdomains_GASM" 683 PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS is_local[],IS is[]) 684 { 685 PC_GASM *osm = (PC_GASM*)pc->data; 686 PetscErrorCode ierr; 687 PetscInt i; 688 689 PetscFunctionBegin; 690 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, n = %D",n); 691 if (pc->setupcalled && (n != osm->n || is_local)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 692 693 if (!pc->setupcalled) { 694 osm->n = n; 695 osm->ois = 0; 696 osm->iis = 0; 697 if (is) { 698 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 699 } 700 if (is_local) { 701 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 702 } 703 if (osm->ois) { 704 ierr = PCGASMDestroySubdomains(osm->n,osm->iis,osm->ois);CHKERRQ(ierr); 705 } 706 if (is) { 707 ierr = PetscMalloc(n*sizeof(IS),&osm->ois);CHKERRQ(ierr); 708 for (i=0; i<n; i++) { osm->ois[i] = is[i]; } 709 /* Flag indicating that the user has set outer subdomains, so PCGASM should not increase their size. */ 710 osm->overlap = -1; 711 } 712 if (is_local) { 713 ierr = PetscMalloc(n*sizeof(IS),&osm->iis);CHKERRQ(ierr); 714 for (i=0; i<n; i++) { osm->iis[i] = is_local[i]; } 715 } 716 } 717 PetscFunctionReturn(0); 718 } 719 EXTERN_C_END 720 721 EXTERN_C_BEGIN 722 #undef __FUNCT__ 723 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 724 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N, PetscBool create_local) { 725 PC_GASM *osm = (PC_GASM*)pc->data; 726 PetscErrorCode ierr; 727 PetscMPIInt rank,size; 728 PetscInt n; 729 PetscInt Nmin, Nmax; 730 PetscFunctionBegin; 731 if(!create_local) SETERRQ(((PetscObject)pc)->comm, PETSC_ERR_SUP, "No suppor for autocreation of nonlocal subdomains."); 732 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be > 0, N = %D",N); 733 ierr = MPI_Allreduce(&N,&Nmin,1,MPI_INT,MPI_MIN,((PetscObject)pc)->comm); CHKERRQ(ierr); 734 ierr = MPI_Allreduce(&N,&Nmax,1,MPI_INT,MPI_MAX,((PetscObject)pc)->comm); CHKERRQ(ierr); 735 if(Nmin != Nmax) 736 SETERRQ2(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONG, "All processors must use the same number of subdomains. min(N) = %D != %D = max(N)", Nmin, Nmax); 737 738 osm->create_local = create_local; 739 /* 740 Split the subdomains equally among all processors 741 */ 742 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 743 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 744 n = N/size + ((N % size) > rank); 745 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one subdomain: total processors %d total blocks %D",(int)rank,(int)size,N); 746 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 747 if (!pc->setupcalled) { 748 if (osm->ois) { 749 ierr = PCGASMDestroySubdomains(osm->n,osm->iis,osm->ois);CHKERRQ(ierr); 750 } 751 osm->N = N; 752 osm->n = n; 753 osm->nmax = N/size + ((N%size)?1:0); 754 osm->ois = 0; 755 osm->iis = 0; 756 } 757 PetscFunctionReturn(0); 758 } 759 EXTERN_C_END 760 761 EXTERN_C_BEGIN 762 #undef __FUNCT__ 763 #define __FUNCT__ "PCGASMSetOverlap_GASM" 764 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 765 { 766 PC_GASM *osm = (PC_GASM*)pc->data; 767 768 PetscFunctionBegin; 769 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 770 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 771 if (!pc->setupcalled) { 772 osm->overlap = ovl; 773 } 774 PetscFunctionReturn(0); 775 } 776 EXTERN_C_END 777 778 EXTERN_C_BEGIN 779 #undef __FUNCT__ 780 #define __FUNCT__ "PCGASMSetType_GASM" 781 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 782 { 783 PC_GASM *osm = (PC_GASM*)pc->data; 784 785 PetscFunctionBegin; 786 osm->type = type; 787 osm->type_set = PETSC_TRUE; 788 PetscFunctionReturn(0); 789 } 790 EXTERN_C_END 791 792 EXTERN_C_BEGIN 793 #undef __FUNCT__ 794 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 795 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 796 { 797 PC_GASM *osm = (PC_GASM*)pc->data; 798 799 PetscFunctionBegin; 800 osm->sort_indices = doSort; 801 PetscFunctionReturn(0); 802 } 803 EXTERN_C_END 804 805 EXTERN_C_BEGIN 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 808 /* 809 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 810 In particular, it would upset the global subdomain number calculation. 811 */ 812 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 813 { 814 PC_GASM *osm = (PC_GASM*)pc->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 819 820 if (n) { 821 *n = osm->n; 822 } 823 if (first) { 824 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 825 *first -= osm->n; 826 } 827 if (ksp) { 828 /* Assume that local solves are now different; not necessarily 829 true, though! This flag is used only for PCView_GASM() */ 830 *ksp = osm->ksp; 831 osm->same_subdomain_solvers = PETSC_FALSE; 832 } 833 PetscFunctionReturn(0); 834 }/* PCGASMGetSubKSP_GASM() */ 835 EXTERN_C_END 836 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "PCGASMSetSubdomains" 840 /*@C 841 PCGASMSetSubdomains - Sets the subdomains for this processor 842 for the additive Schwarz preconditioner. 843 844 Collective on PC 845 846 Input Parameters: 847 + pc - the preconditioner context 848 . n - the number of subdomains for this processor 849 . iis - the index sets that define this processor's local inner subdomains 850 (or PETSC_NULL for PETSc to determine subdomains) 851 - ois- the index sets that define this processor's local outer subdomains 852 (or PETSC_NULL to use the same as iis) 853 854 Notes: 855 The IS indices use the parallel, global numbering of the vector entries. 856 Inner subdomains are those where the correction is applied. 857 Outer subdomains are those where the residual necessary to obtain the 858 corrections is obtained (see PCGASMType for the use of inner/outer subdomains). 859 Both inner and outer subdomains can extend over several processors. 860 This processor's portion of a subdomain is known as a local subdomain. 861 862 By default the GASM preconditioner uses 1 (local) subdomain per processor. 863 Use PCGASMSetTotalSubdomains() to set the total number of subdomains across 864 all processors that PCGASM will create automatically, and to specify whether 865 they should be local or not. 866 867 868 Level: advanced 869 870 .keywords: PC, GASM, set, subdomains, additive Schwarz 871 872 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 873 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 874 @*/ 875 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[]) 876 { 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 881 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 #undef __FUNCT__ 886 #define __FUNCT__ "PCGASMSetTotalSubdomains" 887 /*@C 888 PCGASMSetTotalSubdomains - Sets the total number of subdomains to use in the generalized additive 889 Schwarz preconditioner. The number of subdomains is cumulative across all processors in pc's 890 communicator. Either all or no processors in the PC communicator must call this routine with 891 the same N. The subdomains will be created automatically during PCSetUp(). 892 893 Collective on PC 894 895 Input Parameters: 896 + pc - the preconditioner context 897 . N - the total number of subdomains cumulative across all processors 898 - create_local - whether the subdomains to be created are to be local 899 900 Options Database Key: 901 To set the total number of subdomains and let PCGASM autocreate them, rather than specify the index sets, use the following options: 902 + -pc_gasm_total_subdomains <n> - sets the total number of subdomains to be autocreated by PCGASM 903 - -pc_gasm_subdomains_create_local <true|false> - whether autocreated subdomains should be local or not (default is true) 904 905 By default the GASM preconditioner uses 1 subdomain per processor. 906 907 908 Use PCGASMSetSubdomains() to set subdomains explicitly or to set different numbers 909 of subdomains per processor. 910 911 Level: advanced 912 913 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 914 915 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 916 PCGASMCreateSubdomains2D() 917 @*/ 918 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N, PetscBool create_local) 919 { 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 924 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt,PetscBool),(pc,N,create_local));CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCGASMSetOverlap" 930 /*@ 931 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 932 additive Schwarz preconditioner. Either all or no processors in the 933 PC communicator must call this routine. 934 935 Logically Collective on PC 936 937 Input Parameters: 938 + pc - the preconditioner context 939 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 940 941 Options Database Key: 942 . -pc_gasm_overlap <overlap> - Sets overlap 943 944 Notes: 945 By default the GASM preconditioner uses 1 subdomain per processor. To use 946 multiple subdomain per perocessor, see PCGASMSetTotalSubdomains() or 947 PCGASMSetSubdomains() (and the option -pc_gasm_total_subdomains <n>). 948 949 The overlap defaults to 1, so if one desires that no additional 950 overlap be computed beyond what may have been set with a call to 951 PCGASMSetTotalSubdomains() or PCGASMSetSubdomains(), then ovl 952 must be set to be 0. In particular, if one does not explicitly set 953 the subdomains in application code, then all overlap would be computed 954 internally by PETSc, and using an overlap of 0 would result in an GASM 955 variant that is equivalent to the block Jacobi preconditioner. 956 957 Note that one can define initial index sets with any overlap via 958 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 959 PETSc to extend that overlap further, if desired. 960 961 Level: intermediate 962 963 .keywords: PC, GASM, set, overlap 964 965 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetSubdomains(), PCGASMGetSubKSP(), 966 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 967 @*/ 968 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 969 { 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 974 PetscValidLogicalCollectiveInt(pc,ovl,2); 975 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "PCGASMSetType" 981 /*@ 982 PCGASMSetType - Sets the type of restriction and interpolation used 983 for local problems in the additive Schwarz method. 984 985 Logically Collective on PC 986 987 Input Parameters: 988 + pc - the preconditioner context 989 - type - variant of GASM, one of 990 .vb 991 PC_GASM_BASIC - full interpolation and restriction 992 PC_GASM_RESTRICT - full restriction, local processor interpolation 993 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 994 PC_GASM_NONE - local processor restriction and interpolation 995 .ve 996 997 Options Database Key: 998 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 999 1000 Level: intermediate 1001 1002 .keywords: PC, GASM, set, type 1003 1004 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1005 PCGASMCreateSubdomains2D() 1006 @*/ 1007 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 1008 { 1009 PetscErrorCode ierr; 1010 1011 PetscFunctionBegin; 1012 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1013 PetscValidLogicalCollectiveEnum(pc,type,2); 1014 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "PCGASMSetSortIndices" 1020 /*@ 1021 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 1022 1023 Logically Collective on PC 1024 1025 Input Parameters: 1026 + pc - the preconditioner context 1027 - doSort - sort the subdomain indices 1028 1029 Level: intermediate 1030 1031 .keywords: PC, GASM, set, type 1032 1033 .seealso: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 1034 PCGASMCreateSubdomains2D() 1035 @*/ 1036 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1037 { 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1042 PetscValidLogicalCollectiveBool(pc,doSort,2); 1043 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "PCGASMGetSubKSP" 1049 /*@C 1050 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1051 this processor. 1052 1053 Collective on PC iff first_local is requested 1054 1055 Input Parameter: 1056 . pc - the preconditioner context 1057 1058 Output Parameters: 1059 + n_local - the number of blocks on this processor or PETSC_NULL 1060 . first_local - the global number of the first block on this processor or PETSC_NULL, 1061 all processors must request or all must pass PETSC_NULL 1062 - ksp - the array of KSP contexts 1063 1064 Note: 1065 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1066 1067 Currently for some matrix implementations only 1 block per processor 1068 is supported. 1069 1070 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1071 1072 Level: advanced 1073 1074 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 1075 1076 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap(), 1077 PCGASMCreateSubdomains2D(), 1078 @*/ 1079 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1080 { 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1085 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 /* -------------------------------------------------------------------------------------*/ 1090 /*MC 1091 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1092 its own KSP object. 1093 1094 Options Database Keys: 1095 + -pc_gasm_total_block_count <n> - Sets total number of local subdomains (known as blocks) to be distributed among processors 1096 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1097 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1098 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1099 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1100 1101 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1102 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1103 -pc_gasm_type basic to use the standard GASM. 1104 1105 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1106 than one processor. Defaults to one block per processor. 1107 1108 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1109 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1110 1111 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1112 and set the options directly on the resulting KSP object (you can access its PC 1113 with KSPGetPC()) 1114 1115 1116 Level: beginner 1117 1118 Concepts: additive Schwarz method 1119 1120 References: 1121 An additive variant of the Schwarz alternating method for the case of many subregions 1122 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1123 1124 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1125 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1126 1127 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1128 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1129 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1130 1131 M*/ 1132 1133 EXTERN_C_BEGIN 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "PCCreate_GASM" 1136 PetscErrorCode PCCreate_GASM(PC pc) 1137 { 1138 PetscErrorCode ierr; 1139 PC_GASM *osm; 1140 1141 PetscFunctionBegin; 1142 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1143 osm->N = PETSC_DECIDE; 1144 osm->n = PETSC_DECIDE; 1145 osm->nmax = 0; 1146 osm->overlap = 1; 1147 osm->ksp = 0; 1148 osm->gorestriction = 0; 1149 osm->girestriction = 0; 1150 osm->gx = 0; 1151 osm->gy = 0; 1152 osm->x = 0; 1153 osm->y = 0; 1154 osm->ois = 0; 1155 osm->iis = 0; 1156 osm->pmat = 0; 1157 osm->type = PC_GASM_RESTRICT; 1158 osm->same_subdomain_solvers = PETSC_TRUE; 1159 osm->sort_indices = PETSC_TRUE; 1160 1161 pc->data = (void*)osm; 1162 pc->ops->apply = PCApply_GASM; 1163 pc->ops->applytranspose = PCApplyTranspose_GASM; 1164 pc->ops->setup = PCSetUp_GASM; 1165 pc->ops->reset = PCReset_GASM; 1166 pc->ops->destroy = PCDestroy_GASM; 1167 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1168 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1169 pc->ops->view = PCView_GASM; 1170 pc->ops->applyrichardson = 0; 1171 1172 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSubdomains_C","PCGASMSetSubdomains_GASM", 1173 PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1175 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1177 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1179 PCGASMSetType_GASM);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1181 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1182 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1183 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 EXTERN_C_END 1187 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "PCGASMCreateLocalSubdomains" 1191 /*@C 1192 PCGASMCreateLocalSubdomains - Creates n local index sets for the overlapping 1193 Schwarz preconditioner for a any problem based on its matrix. 1194 1195 Collective 1196 1197 Input Parameters: 1198 + A - The global matrix operator 1199 . overlap - amount of overlap in outer subdomains 1200 - n - the number of local subdomains 1201 1202 Output Parameters: 1203 + iis - the array of index sets defining the local inner subdomains (on which the correction is applied) 1204 - ois - the array of index sets defining the local outer subdomains (on which the residual is computed) 1205 1206 Level: advanced 1207 1208 Note: this generates n nonoverlapping local inner subdomains on PETSC_COMM_SELF; 1209 PCGASM will generate the overlap from these if you use them in PCGASMSetSubdomains() and set a 1210 nonzero overlap with PCGASMSetOverlap() 1211 1212 In the Fortran version you must provide the array outis[] already allocated of length n. 1213 1214 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1215 1216 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1217 @*/ 1218 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt overlap, PetscInt n, IS* iis[], IS* ois[]) 1219 { 1220 MatPartitioning mpart; 1221 const char *prefix; 1222 PetscErrorCode (*f)(Mat,MatReuse,Mat*); 1223 PetscMPIInt size; 1224 PetscInt i,j,rstart,rend,bs; 1225 PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1226 Mat Ad = PETSC_NULL, adj; 1227 IS ispart,isnumb,*is; 1228 PetscErrorCode ierr; 1229 1230 PetscFunctionBegin; 1231 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1232 PetscValidPointer(iis,4); 1233 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1234 1235 /* Get prefix, row distribution, and block size */ 1236 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1237 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1238 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1239 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1240 1241 /* Get diagonal block from matrix if possible */ 1242 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1243 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1244 if (f) { 1245 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1246 } else if (size == 1) { 1247 Ad = A; 1248 } 1249 if (Ad) { 1250 ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1251 if (!isbaij) {ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1252 } 1253 if (Ad && n > 1) { 1254 PetscBool match,done; 1255 /* Try to setup a good matrix partitioning if available */ 1256 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1257 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1258 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1260 if (!match) { 1261 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1262 } 1263 if (!match) { /* assume a "good" partitioner is available */ 1264 PetscInt na,*ia,*ja; 1265 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1266 if (done) { 1267 /* Build adjacency matrix by hand. Unfortunately a call to 1268 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1269 remove the block-aij structure and we cannot expect 1270 MatPartitioning to split vertices as we need */ 1271 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1272 nnz = 0; 1273 for (i=0; i<na; i++) { /* count number of nonzeros */ 1274 len = ia[i+1] - ia[i]; 1275 row = ja + ia[i]; 1276 for (j=0; j<len; j++) { 1277 if (row[j] == i) { /* don't count diagonal */ 1278 len--; break; 1279 } 1280 } 1281 nnz += len; 1282 } 1283 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1284 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1285 nnz = 0; 1286 iia[0] = 0; 1287 for (i=0; i<na; i++) { /* fill adjacency */ 1288 cnt = 0; 1289 len = ia[i+1] - ia[i]; 1290 row = ja + ia[i]; 1291 for (j=0; j<len; j++) { 1292 if (row[j] != i) { /* if not diagonal */ 1293 jja[nnz+cnt++] = row[j]; 1294 } 1295 } 1296 nnz += cnt; 1297 iia[i+1] = nnz; 1298 } 1299 /* Partitioning of the adjacency matrix */ 1300 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1301 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1302 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1303 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1304 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1305 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1306 foundpart = PETSC_TRUE; 1307 } 1308 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1309 } 1310 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1311 } 1312 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1313 if (!foundpart) { 1314 1315 /* Partitioning by contiguous chunks of rows */ 1316 1317 PetscInt mbs = (rend-rstart)/bs; 1318 PetscInt start = rstart; 1319 for (i=0; i<n; i++) { 1320 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1321 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1322 start += count; 1323 } 1324 1325 } else { 1326 1327 /* Partitioning by adjacency of diagonal block */ 1328 1329 const PetscInt *numbering; 1330 PetscInt *count,nidx,*indices,*newidx,start=0; 1331 /* Get node count in each partition */ 1332 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1333 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1334 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1335 for (i=0; i<n; i++) count[i] *= bs; 1336 } 1337 /* Build indices from node numbering */ 1338 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1339 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1340 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1341 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1342 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1343 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1344 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1345 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1346 for (i=0; i<nidx; i++) 1347 for (j=0; j<bs; j++) 1348 newidx[i*bs+j] = indices[i]*bs + j; 1349 ierr = PetscFree(indices);CHKERRQ(ierr); 1350 nidx *= bs; 1351 indices = newidx; 1352 } 1353 /* Shift to get global indices */ 1354 for (i=0; i<nidx; i++) indices[i] += rstart; 1355 1356 /* Build the index sets for each block */ 1357 for (i=0; i<n; i++) { 1358 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1359 ierr = ISSort(is[i]);CHKERRQ(ierr); 1360 start += count[i]; 1361 } 1362 1363 ierr = PetscFree(count); 1364 ierr = PetscFree(indices); 1365 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1366 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1367 } 1368 *iis = is; 1369 if(!ois) PetscFunctionReturn(0); 1370 /* 1371 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap 1372 has been requested, copy the inner subdomains over so they can be modified. 1373 */ 1374 ierr = PetscMalloc(n*sizeof(IS),ois);CHKERRQ(ierr); 1375 for (i=0; i<n; ++i) { 1376 if (overlap > 0) { /* With positive overlap, (*iis)[i] will be modified */ 1377 ierr = ISDuplicate((*iis)[i],(*ois)+i);CHKERRQ(ierr); 1378 ierr = ISCopy((*iis)[i],(*ois)[i]);CHKERRQ(ierr); 1379 } else { 1380 ierr = PetscObjectReference((PetscObject)(*iis)[i]);CHKERRQ(ierr); 1381 (*ois)[i] = (*iis)[i]; 1382 } 1383 } 1384 if (overlap > 0) { 1385 /* Extend the "overlapping" regions by a number of steps */ 1386 ierr = MatIncreaseOverlap(A,n,*ois,overlap);CHKERRQ(ierr); 1387 } 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "PCGASMDestroySubdomains" 1393 /*@C 1394 PCGASMDestroySubdomains - Destroys the index sets created with 1395 PCGASMCreateLocalSubdomains() or PCGASMCreateSubdomains2D. Should be 1396 called after setting subdomains with PCGASMSetSubdomains(). 1397 1398 Collective 1399 1400 Input Parameters: 1401 + n - the number of index sets 1402 . iis - the array of inner subdomains, 1403 - ois - the array of outer subdomains, can be PETSC_NULL 1404 1405 Level: intermediate 1406 1407 Notes: this is merely a convenience subroutine that walks each list, 1408 destroys each IS on the list, and then frees the list. 1409 1410 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1411 1412 .seealso: PCGASMCreateLocalSubdomains(), PCGASMSetSubdomains() 1413 @*/ 1414 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS iis[], IS ois[]) 1415 { 1416 PetscInt i; 1417 PetscErrorCode ierr; 1418 PetscFunctionBegin; 1419 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1420 PetscValidPointer(iis,2); 1421 for (i=0; i<n; i++) { ierr = ISDestroy(&iis[i]);CHKERRQ(ierr); } 1422 ierr = PetscFree(iis);CHKERRQ(ierr); 1423 if (ois) { 1424 for (i=0; i<n; i++) { ierr = ISDestroy(&ois[i]);CHKERRQ(ierr); } 1425 ierr = PetscFree(ois);CHKERRQ(ierr); 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 1431 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1432 { \ 1433 PetscInt first_row = first/M, last_row = last/M+1; \ 1434 /* \ 1435 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1436 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1437 subdomain). \ 1438 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1439 of the intersection. \ 1440 */ \ 1441 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1442 *ylow_loc = PetscMax(first_row,ylow); \ 1443 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1444 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1445 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1446 *yhigh_loc = PetscMin(last_row,yhigh); \ 1447 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1448 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1449 /* Now compute the size of the local subdomain n. */ \ 1450 *n = 0; \ 1451 if(*ylow_loc < *yhigh_loc) { \ 1452 PetscInt width = xright-xleft; \ 1453 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1454 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1455 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1456 }\ 1457 } 1458 1459 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1463 /*@ 1464 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1465 preconditioner for a two-dimensional problem on a regular grid. 1466 1467 Collective 1468 1469 Input Parameters: 1470 + M, N - the global number of grid points in the x and y directions 1471 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1472 . dof - degrees of freedom per node 1473 - overlap - overlap in mesh lines 1474 1475 Output Parameters: 1476 + Nsub - the number of local subdomains created 1477 . iis - array of index sets defining inner (nonoverlapping) subdomains 1478 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains 1479 1480 1481 Level: advanced 1482 1483 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1484 1485 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1486 PCGASMSetOverlap() 1487 @*/ 1488 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **iis,IS **ois) 1489 { 1490 PetscErrorCode ierr; 1491 PetscMPIInt size, rank; 1492 PetscInt i, j; 1493 PetscInt maxheight, maxwidth; 1494 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1495 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1496 PetscInt x[2][2], y[2][2], n[2]; 1497 PetscInt first, last; 1498 PetscInt nidx, *idx; 1499 PetscInt ii,jj,s,q,d; 1500 PetscInt k,kk; 1501 PetscMPIInt color; 1502 MPI_Comm comm, subcomm; 1503 IS **xis = 0, **is = ois, **is_local = iis; 1504 1505 PetscFunctionBegin; 1506 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1507 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1508 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1509 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1510 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1511 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1512 1513 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1514 s = 0; 1515 ystart = 0; 1516 for (j=0; j<Ndomains; ++j) { 1517 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1518 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1519 /* Vertical domain limits with an overlap. */ 1520 ylow = PetscMax(ystart - overlap,0); 1521 yhigh = PetscMin(ystart + maxheight + overlap,N); 1522 xstart = 0; 1523 for (i=0; i<Mdomains; ++i) { 1524 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1525 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1526 /* Horizontal domain limits with an overlap. */ 1527 xleft = PetscMax(xstart - overlap,0); 1528 xright = PetscMin(xstart + maxwidth + overlap,M); 1529 /* 1530 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1531 */ 1532 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1533 if(nidx) { 1534 ++s; 1535 } 1536 xstart += maxwidth; 1537 }/* for(i = 0; i < Mdomains; ++i) */ 1538 ystart += maxheight; 1539 }/* for(j = 0; j < Ndomains; ++j) */ 1540 /* Now we can allocate the necessary number of ISs. */ 1541 *nsub = s; 1542 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1543 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1544 s = 0; 1545 ystart = 0; 1546 for (j=0; j<Ndomains; ++j) { 1547 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1548 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1549 /* Vertical domain limits with an overlap. */ 1550 y[0][0] = PetscMax(ystart - overlap,0); 1551 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1552 /* Vertical domain limits without an overlap. */ 1553 y[1][0] = ystart; 1554 y[1][1] = PetscMin(ystart + maxheight,N); 1555 xstart = 0; 1556 for (i=0; i<Mdomains; ++i) { 1557 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1558 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1559 /* Horizontal domain limits with an overlap. */ 1560 x[0][0] = PetscMax(xstart - overlap,0); 1561 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1562 /* Horizontal domain limits without an overlap. */ 1563 x[1][0] = xstart; 1564 x[1][1] = PetscMin(xstart+maxwidth,M); 1565 /* 1566 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1567 Do this twice: first for the domains with overlaps, and once without. 1568 During the first pass create the subcommunicators, and use them on the second pass as well. 1569 */ 1570 for(q = 0; q < 2; ++q) { 1571 /* 1572 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1573 according to whether the domain with an overlap or without is considered. 1574 */ 1575 xleft = x[q][0]; xright = x[q][1]; 1576 ylow = y[q][0]; yhigh = y[q][1]; 1577 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1578 nidx *= dof; 1579 n[q] = nidx; 1580 /* 1581 Based on the counted number of indices in the local domain *with an overlap*, 1582 construct a subcommunicator of all the processors supporting this domain. 1583 Observe that a domain with an overlap might have nontrivial local support, 1584 while the domain without an overlap might not. Hence, the decision to participate 1585 in the subcommunicator must be based on the domain with an overlap. 1586 */ 1587 if (q == 0) { 1588 if(nidx) { 1589 color = 1; 1590 } else { 1591 color = MPI_UNDEFINED; 1592 } 1593 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1594 } 1595 /* 1596 Proceed only if the number of local indices *with an overlap* is nonzero. 1597 */ 1598 if (n[0]) { 1599 if(q == 0) { 1600 xis = is; 1601 } 1602 if (q == 1) { 1603 /* 1604 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1605 Moreover, if the overlap is zero, the two ISs are identical. 1606 */ 1607 if (overlap == 0) { 1608 (*is_local)[s] = (*is)[s]; 1609 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1610 continue; 1611 } else { 1612 xis = is_local; 1613 subcomm = ((PetscObject)(*is)[s])->comm; 1614 } 1615 }/* if(q == 1) */ 1616 idx = PETSC_NULL; 1617 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1618 if(nidx) { 1619 k = 0; 1620 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1621 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1622 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1623 kk = dof*(M*jj + x0); 1624 for (ii=x0; ii<x1; ++ii) { 1625 for(d = 0; d < dof; ++d) { 1626 idx[k++] = kk++; 1627 } 1628 } 1629 } 1630 } 1631 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr); 1632 }/* if(n[0]) */ 1633 }/* for(q = 0; q < 2; ++q) */ 1634 if(n[0]) { 1635 ++s; 1636 } 1637 xstart += maxwidth; 1638 }/* for(i = 0; i < Mdomains; ++i) */ 1639 ystart += maxheight; 1640 }/* for(j = 0; j < Ndomains; ++j) */ 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "PCGASMGetSubdomains" 1646 /*@C 1647 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1648 for the additive Schwarz preconditioner. 1649 1650 Not Collective 1651 1652 Input Parameter: 1653 . pc - the preconditioner context 1654 1655 Output Parameters: 1656 + n - the number of subdomains for this processor (default value = 1) 1657 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be PETSC_NULL) 1658 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be PETSC_NULL) 1659 1660 1661 Notes: 1662 The user is responsible for destroying the ISs and freeing the returned arrays. 1663 The IS numbering is in the parallel, global numbering of the vector. 1664 1665 Level: advanced 1666 1667 .keywords: PC, GASM, get, subdomains, additive Schwarz 1668 1669 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1670 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1671 @*/ 1672 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[]) 1673 { 1674 PC_GASM *osm; 1675 PetscErrorCode ierr; 1676 PetscBool match; 1677 PetscInt i; 1678 PetscFunctionBegin; 1679 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1680 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1681 if (!match) 1682 SETERRQ2(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1683 osm = (PC_GASM*)pc->data; 1684 if (n) *n = osm->n; 1685 if(iis) { 1686 ierr = PetscMalloc(osm->n*sizeof(IS), iis); CHKERRQ(ierr); 1687 } 1688 if(ois) { 1689 ierr = PetscMalloc(osm->n*sizeof(IS), ois); CHKERRQ(ierr); 1690 } 1691 if(iis || ois) { 1692 for(i = 0; i < osm->n; ++i) { 1693 if(iis) (*iis)[i] = osm->iis[i]; 1694 if(ois) (*ois)[i] = osm->ois[i]; 1695 } 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "PCGASMGetSubmatrices" 1702 /*@C 1703 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1704 only) for the additive Schwarz preconditioner. 1705 1706 Not Collective 1707 1708 Input Parameter: 1709 . pc - the preconditioner context 1710 1711 Output Parameters: 1712 + n - the number of matrices for this processor (default value = 1) 1713 - mat - the matrices 1714 1715 Notes: matrices returned by this routine have the same communicators as the index sets (IS) 1716 used to define subdomains in PCGASMSetSubdomains(), or PETSC_COMM_SELF, if the 1717 subdomains were defined using PCGASMSetTotalSubdomains(). 1718 Level: advanced 1719 1720 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1721 1722 .seealso: PCGASMSetTotalSubdomain(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1723 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1724 @*/ 1725 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1726 { 1727 PC_GASM *osm; 1728 PetscErrorCode ierr; 1729 PetscBool match; 1730 1731 PetscFunctionBegin; 1732 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1733 PetscValidIntPointer(n,2); 1734 if (mat) PetscValidPointer(mat,3); 1735 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1736 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1737 if (!match) SETERRQ2(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1738 osm = (PC_GASM*)pc->data; 1739 if (n) *n = osm->n; 1740 if (mat) *mat = osm->pmat; 1741 1742 PetscFunctionReturn(0); 1743 } 1744