1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may have any number of subdomains and a subdomain may live on multiple 4 processors. 5 6 N - total number of local subdomains on all processors (set in PCGASMSetTotalBlockCount() or calculated in PCSetUp_GASM()) 7 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalBlockCount()) 8 nmax - maximum number of local subdomains per processor (calculated in PCGASMSetTotalBlockCount() or in PCSetUp_GASM()) 9 */ 10 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 11 12 typedef struct { 13 PetscInt N,n,nmax; 14 PetscInt overlap; /* overlap requested by user */ 15 KSP *ksp; /* linear solvers for each block */ 16 Vec gx,gy; /* Merged work vectors */ 17 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 18 IS gis, gis_local; /* merged ISs */ 19 VecScatter grestriction; /* merged restriction */ 20 VecScatter gprolongation; /* merged prolongation */ 21 IS *is; /* index set that defines each overlapping subdomain */ 22 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 23 Mat *pmat; /* subdomain block matrices */ 24 PCGASMType type; /* use reduced interpolation, restriction or both */ 25 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 26 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 27 PetscBool sort_indices; /* flag to sort subdomain indices */ 28 } PC_GASM; 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "PCGASMSubdomainView_Private" 32 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 33 { 34 PC_GASM *osm = (PC_GASM*)pc->data; 35 PetscInt j,nidx; 36 const PetscInt *idx; 37 PetscViewer sviewer; 38 char *cidx; 39 PetscErrorCode ierr; 40 41 PetscFunctionBegin; 42 if(i < 0 || i > osm->n) SETERRQ2(((PetscObject)viewer)->comm, PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 43 ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr); 44 /* 45 No more than 15 characters per index plus a space. 46 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 47 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 48 For nidx == 0, the whole string 16 '\0'. 49 */ 50 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 51 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer); CHKERRQ(ierr); 52 ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr); 53 for(j = 0; j < nidx; ++j) { 54 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]); CHKERRQ(ierr); 55 } 56 ierr = PetscViewerDestroy(&sviewer); CHKERRQ(ierr); 57 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 58 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap:\n");CHKERRQ(ierr); 59 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 60 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 61 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 62 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 63 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 64 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 65 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 66 ierr = PetscFree(cidx);CHKERRQ(ierr); 67 if (osm->is_local) { 68 ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr); 69 /* 70 No more than 15 characters per index plus a space. 71 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 72 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 73 For nidx == 0, the whole string 16 '\0'. 74 */ 75 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 76 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer); CHKERRQ(ierr); 77 ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr); 78 for(j = 0; j < nidx; ++j) { 79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 80 } 81 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 82 ierr = PetscViewerDestroy(&sviewer); CHKERRQ(ierr); 83 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "Subdomain without overlap:\n");CHKERRQ(ierr); 84 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 85 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "\n");CHKERRQ(ierr); 86 ierr = PetscFree(cidx);CHKERRQ(ierr); 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCGASMPrintSubdomains" 93 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 94 { 95 PC_GASM *osm = (PC_GASM*)pc->data; 96 const char *prefix; 97 char fname[PETSC_MAX_PATH_LEN+1]; 98 PetscViewer viewer; 99 PetscInt i; 100 PetscBool found; 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 105 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 106 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 107 for (i=0;i<osm->n;++i) { 108 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 109 ierr = PCGASMSubdomainView_Private(pc, i, viewer); CHKERRQ(ierr); 110 } 111 PetscFunctionReturn(0); 112 } 113 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCView_GASM" 117 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 118 { 119 PC_GASM *osm = (PC_GASM*)pc->data; 120 const char *prefix; 121 PetscErrorCode ierr; 122 PetscMPIInt rank, size; 123 PetscInt i,bsz; 124 PetscBool iascii,view_subdomains=PETSC_FALSE; 125 PetscViewer sviewer; 126 PetscInt count, l, gcount, *numbering, *permutation; 127 char overlap[256] = "no additional overlap requested"; 128 char gsubdomains[256] = "unknown number of global subdomains"; 129 char lsubdomains[256] = "unknown number of local subdomains"; 130 char msubdomains[256] = "unknown max number of local subdomains"; 131 PetscFunctionBegin; 132 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 133 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 134 135 136 ierr = PetscMalloc2(osm->n, PetscInt, &permutation, osm->n, PetscInt, &numbering); CHKERRQ(ierr); 137 for(i = 0; i < osm->n; ++i) permutation[i] = i; 138 ierr = PetscObjectListGetGlobalNumbering(((PetscObject)pc)->comm, osm->n, (PetscObject*)osm->is, &gcount, numbering); CHKERRQ(ierr); 139 ierr = PetscSortIntWithPermutation(osm->n, numbering, permutation); CHKERRQ(ierr); 140 141 if(osm->overlap >= 0) { 142 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap); CHKERRQ(ierr); 143 } 144 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "number of global subdomains = %D",gcount); CHKERRQ(ierr); 145 if(osm->N > 0) { 146 ierr = PetscSNPrintf(lsubdomains, sizeof(gsubdomains), "number of local subdomains = %D",osm->N); CHKERRQ(ierr); 147 } 148 if(osm->nmax > 0){ 149 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 150 } 151 152 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 153 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_view_subdomains",&view_subdomains,PETSC_NULL);CHKERRQ(ierr); 154 155 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii); CHKERRQ(ierr); 156 if (iascii) { 157 /* 158 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 159 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 160 collectively on the comm. 161 */ 162 ierr = PetscObjectName((PetscObject)viewer); CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer,"Generalized additive Schwarz:\n"); CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer,"Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",overlap); CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",gsubdomains);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",lsubdomains);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer,"%s\n",msubdomains);CHKERRQ(ierr); 169 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 170 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 171 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 173 /* Cannot take advantage of osm->same_local_solves without a global numbering of subdomains. */ 174 ierr = PetscViewerASCIIPrintf(viewer,"Subdomain solver info is as follows:\n");CHKERRQ(ierr); 175 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 176 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 177 /* Make sure that everybody waits for the banner to be printed. */ 178 ierr = MPI_Barrier(((PetscObject)viewer)->comm); CHKERRQ(ierr); 179 l = 0; 180 for(count = 0; count < gcount; ++count) { 181 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 182 PetscMPIInt srank, ssize; 183 if(l<osm->n){ 184 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 185 if(numbering[d] == count) { 186 ierr = MPI_Comm_size(((PetscObject)osm->is[d])->comm, &ssize); CHKERRQ(ierr); 187 ierr = MPI_Comm_rank(((PetscObject)osm->is[d])->comm, &srank); CHKERRQ(ierr); 188 ierr = PetscViewerGetSubcomm(viewer,((PetscObject)osm->is[d])->comm, &sviewer);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(osm->is[d],&bsz);CHKERRQ(ierr); 190 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr); 191 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%D:%D] (subcomm [%D:%D]) local subdomain number %D, local size = %D\n",(int)rank,(int)size,(int)srank,(int)ssize,d,bsz);CHKERRQ(ierr); 192 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 193 if(view_subdomains) { 194 ierr = PCGASMSubdomainView_Private(pc,d,sviewer); CHKERRQ(ierr); 195 } 196 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 197 ierr = PetscViewerASCIIPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 198 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 199 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr); 200 ierr = PetscViewerRestoreSubcomm(viewer,((PetscObject)osm->is[d])->comm, &sviewer);CHKERRQ(ierr); 201 ++l; 202 } 203 } 204 ierr = MPI_Barrier(((PetscObject)pc)->comm); CHKERRQ(ierr); 205 } 206 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 207 } else { 208 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 209 } 210 ierr = PetscFree2(permutation,numbering); CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 215 216 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "PCSetUp_GASM" 220 static PetscErrorCode PCSetUp_GASM(PC pc) 221 { 222 PC_GASM *osm = (PC_GASM*)pc->data; 223 PetscErrorCode ierr; 224 PetscBool symset,flg; 225 PetscInt i; 226 PetscMPIInt rank, size; 227 MatReuse scall = MAT_REUSE_MATRIX; 228 KSP ksp; 229 PC subpc; 230 const char *prefix,*pprefix; 231 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 232 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 233 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 234 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 235 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 236 Vec x,y; 237 PetscScalar *gxarray, *gyarray; 238 PetscInt gfirst, glast; 239 DM *domain_dm = PETSC_NULL; 240 241 PetscFunctionBegin; 242 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 243 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 244 if (!pc->setupcalled) { 245 246 if (!osm->type_set) { 247 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 248 if (symset && flg) { osm->type = PC_GASM_BASIC; } 249 } 250 251 /* 252 If subdomains have been set, then the local number of subdomains, osm->n, is NOT PETSC_DECIDE and is at least 1. 253 The total number of subdomains, osm->N is not necessarily set, might be PETSC_DECIDE, and then will have to be calculated from osm->n. 254 */ 255 if (osm->n == PETSC_DECIDE) { 256 /* no subdomains given */ 257 /* try pc->dm first */ 258 if(pc->dm) { 259 char ddm_name[1024]; 260 DM ddm; 261 PetscBool flg; 262 PetscInt num_domains, d; 263 char **domain_names; 264 IS *domain_is; 265 /* Allow the user to request a decomposition DM by name */ 266 ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr); 267 ierr = PetscOptionsString("-pc_gasm_decomposition","Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr); 268 if(flg) { 269 ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr); 270 if(!ddm) { 271 SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name); 272 } 273 ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr); 274 ierr = PCSetDM(pc,ddm); CHKERRQ(ierr); 275 } 276 ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm); CHKERRQ(ierr); 277 if(num_domains) { 278 ierr = PCGASMSetSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr); 279 } 280 for(d = 0; d < num_domains; ++d) { 281 ierr = PetscFree(domain_names[d]); CHKERRQ(ierr); 282 ierr = ISDestroy(&domain_is[d]); CHKERRQ(ierr); 283 } 284 ierr = PetscFree(domain_names);CHKERRQ(ierr); 285 ierr = PetscFree(domain_is);CHKERRQ(ierr); 286 } 287 if (osm->n == PETSC_DECIDE) { /* still no subdomains; use one per processor */ 288 osm->nmax = osm->n = 1; 289 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 290 osm->N = size; 291 } 292 } 293 if (osm->N == PETSC_DECIDE) { 294 PetscInt inwork[2], outwork[2]; 295 /* determine global number of subdomains and the max number of local subdomains */ 296 inwork[0] = inwork[1] = osm->n; 297 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 298 osm->nmax = outwork[0]; 299 osm->N = outwork[1]; 300 } 301 if (!osm->is){ 302 /* The local number of subdomains was set in PCGASMSetTotalBlockCount() or PCGASMSetSubdomains(), 303 but the actual subdomains have not been supplied (in PCGASMSetSubdomains()). 304 We create the requisite number of subdomains on PETSC_COMM_SELF. */ 305 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->is,&osm->is_local);CHKERRQ(ierr); 306 } 307 if (!osm->is_local) { 308 /* 309 This indicates that osm->is should define a nonoverlapping decomposition 310 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetSubdomains, 311 but the assumption is that either the user does the right thing, or subdomains in osm->is have been created 312 via PCGASMCreateLocalSubdomains(), which guarantees a nonoverlapping decomposition). 313 Therefore, osm->is will be used to define osm->is_local. 314 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 315 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 316 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 317 */ 318 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 319 for (i=0; i<osm->n; i++) { 320 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 321 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 322 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 323 } else { 324 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 325 osm->is_local[i] = osm->is[i]; 326 } 327 } 328 } 329 /* Beyond this point osm->is_local is not null. */ 330 if (osm->overlap > 0) { 331 /* Extend the "overlapping" regions by a number of steps */ 332 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 333 } 334 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 335 flg = PETSC_FALSE; 336 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 337 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 338 339 if (osm->sort_indices) { 340 for (i=0; i<osm->n; i++) { 341 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 342 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 343 } 344 } 345 /* Merge the ISs, create merged vectors and scatter contexts. */ 346 /* Restriction ISs. */ 347 ddn = 0; 348 for (i=0; i<osm->n; i++) { 349 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 350 ddn += dn; 351 } 352 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr); 353 ddn = 0; 354 for (i=0; i<osm->n; i++) { 355 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 356 ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr); 357 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr); 358 ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr); 359 ddn += dn; 360 } 361 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr); 362 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 363 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 364 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 365 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 366 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr); 367 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr); 368 ierr = ISDestroy(&gid);CHKERRQ(ierr); 369 /* Prolongation ISs */ 370 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 371 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 372 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 373 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 374 /**/ 375 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 376 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 377 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 378 IS gis_llocal; /* IS with ddidx_llocal indices. */ 379 PetscInt j; 380 ddn_local = 0; 381 for (i=0; i<osm->n; i++) { 382 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 383 ddn_local += dn_local; 384 } 385 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr); 386 ddn_local = 0; 387 for (i=0; i<osm->n; i++) { 388 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 389 ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr); 390 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr); 391 ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr); 392 ddn_local += dn_local; 393 } 394 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr); 395 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr); 396 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 397 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 398 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 399 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 400 /* Now convert these localized indices into the global indices into the merged output vector. */ 401 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr); 402 for(j=0; j < ddn_llocal; ++j) { 403 ddidx_llocal[j] += gfirst; 404 } 405 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr); 406 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr); 407 ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr); 408 } 409 /* Create the subdomain work vectors. */ 410 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 411 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 412 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 413 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 414 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 415 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 416 PetscInt dN; 417 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 418 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 419 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,1,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 420 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,1,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 421 } 422 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 423 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 424 ierr = VecDestroy(&x);CHKERRQ(ierr); 425 ierr = VecDestroy(&y);CHKERRQ(ierr); 426 /* Create the local solvers */ 427 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 428 for (i=0; i<osm->n; i++) { /* KSPs are local */ 429 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 430 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 431 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 432 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 433 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 434 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 435 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 436 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 437 osm->ksp[i] = ksp; 438 } 439 scall = MAT_INITIAL_MATRIX; 440 441 } else { 442 /* 443 Destroy the blocks from the previous iteration 444 */ 445 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 446 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 447 scall = MAT_INITIAL_MATRIX; 448 } 449 } 450 451 /* 452 Extract out the submatrices. 453 */ 454 if(size > 1) { 455 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 456 } 457 else { 458 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 459 } 460 if (scall == MAT_INITIAL_MATRIX) { 461 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 462 for (i=0; i<osm->n; i++) { 463 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 464 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 465 } 466 } 467 468 /* Return control to the user so that the submatrices can be modified (e.g., to apply 469 different boundary conditions for the submatrices than for the global problem) */ 470 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 471 472 /* 473 Loop over submatrices putting them into local ksp 474 */ 475 for (i=0; i<osm->n; i++) { 476 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 477 if (!pc->setupcalled) { 478 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 479 } 480 } 481 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 487 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 488 { 489 PC_GASM *osm = (PC_GASM*)pc->data; 490 PetscErrorCode ierr; 491 PetscInt i; 492 493 PetscFunctionBegin; 494 for (i=0; i<osm->n; i++) { 495 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCApply_GASM" 502 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 503 { 504 PC_GASM *osm = (PC_GASM*)pc->data; 505 PetscErrorCode ierr; 506 PetscInt i; 507 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 508 509 PetscFunctionBegin; 510 /* 511 Support for limiting the restriction or interpolation to only local 512 subdomain values (leaving the other values 0). 513 */ 514 if (!(osm->type & PC_GASM_RESTRICT)) { 515 forward = SCATTER_FORWARD_LOCAL; 516 /* have to zero the work RHS since scatter may leave some slots empty */ 517 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 518 } 519 if (!(osm->type & PC_GASM_INTERPOLATE)) { 520 reverse = SCATTER_REVERSE_LOCAL; 521 } 522 523 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 524 ierr = VecZeroEntries(y);CHKERRQ(ierr); 525 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 526 /* do the subdomain solves */ 527 for (i=0; i<osm->n; ++i) { 528 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 529 } 530 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 531 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "PCApplyTranspose_GASM" 537 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 538 { 539 PC_GASM *osm = (PC_GASM*)pc->data; 540 PetscErrorCode ierr; 541 PetscInt i; 542 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 543 544 PetscFunctionBegin; 545 /* 546 Support for limiting the restriction or interpolation to only local 547 subdomain values (leaving the other values 0). 548 549 Note: these are reversed from the PCApply_GASM() because we are applying the 550 transpose of the three terms 551 */ 552 if (!(osm->type & PC_GASM_INTERPOLATE)) { 553 forward = SCATTER_FORWARD_LOCAL; 554 /* have to zero the work RHS since scatter may leave some slots empty */ 555 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 556 } 557 if (!(osm->type & PC_GASM_RESTRICT)) { 558 reverse = SCATTER_REVERSE_LOCAL; 559 } 560 561 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 562 ierr = VecZeroEntries(y);CHKERRQ(ierr); 563 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 564 /* do the local solves */ 565 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 566 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 567 } 568 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 569 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "PCReset_GASM" 575 static PetscErrorCode PCReset_GASM(PC pc) 576 { 577 PC_GASM *osm = (PC_GASM*)pc->data; 578 PetscErrorCode ierr; 579 PetscInt i; 580 581 PetscFunctionBegin; 582 if (osm->ksp) { 583 for (i=0; i<osm->n; i++) { 584 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 585 } 586 } 587 if (osm->pmat) { 588 if (osm->n > 0) { 589 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 590 } 591 } 592 if (osm->x) { 593 for (i=0; i<osm->n; i++) { 594 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 595 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 596 } 597 } 598 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 599 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 600 601 ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr); 602 ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr); 603 if (osm->is) { 604 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 605 osm->is = 0; 606 osm->is_local = 0; 607 } 608 ierr = ISDestroy(&osm->gis);CHKERRQ(ierr); 609 ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "PCDestroy_GASM" 615 static PetscErrorCode PCDestroy_GASM(PC pc) 616 { 617 PC_GASM *osm = (PC_GASM*)pc->data; 618 PetscErrorCode ierr; 619 PetscInt i; 620 621 PetscFunctionBegin; 622 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 623 if (osm->ksp) { 624 for (i=0; i<osm->n; i++) { 625 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 626 } 627 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 628 } 629 ierr = PetscFree(osm->x);CHKERRQ(ierr); 630 ierr = PetscFree(osm->y);CHKERRQ(ierr); 631 ierr = PetscFree(pc->data);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "PCSetFromOptions_GASM" 637 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 638 PC_GASM *osm = (PC_GASM*)pc->data; 639 PetscErrorCode ierr; 640 PetscInt blocks,ovl; 641 PetscBool symset,flg; 642 PCGASMType gasmtype; 643 644 PetscFunctionBegin; 645 /* set the type to symmetric if matrix is symmetric */ 646 if (!osm->type_set && pc->pmat) { 647 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 648 if (symset && flg) { osm->type = PC_GASM_BASIC; } 649 } 650 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 651 ierr = PetscOptionsInt("-pc_gasm_total_block_count","Total number of local subdomains across communicator","PCGASMSetTotalBlockCount",osm->n,&blocks,&flg);CHKERRQ(ierr); 652 if (flg) {ierr = PCGASMSetTotalBlockCount(pc,blocks);CHKERRQ(ierr); } 653 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 654 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 655 flg = PETSC_FALSE; 656 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 657 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 658 ierr = PetscOptionsTail();CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 /*------------------------------------------------------------------------------------*/ 663 664 EXTERN_C_BEGIN 665 #undef __FUNCT__ 666 #define __FUNCT__ "PCGASMSetSubdomains_GASM" 667 PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 668 { 669 PC_GASM *osm = (PC_GASM*)pc->data; 670 PetscErrorCode ierr; 671 PetscInt i; 672 673 PetscFunctionBegin; 674 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, n = %D",n); 675 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 676 677 if (!pc->setupcalled) { 678 osm->n = n; 679 osm->is = 0; 680 osm->is_local = 0; 681 if (is) { 682 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 683 } 684 if (is_local) { 685 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 686 } 687 if (osm->is) { 688 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 689 } 690 if (is) { 691 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 692 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 693 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 694 osm->overlap = -1; 695 } 696 if (is_local) { 697 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 698 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 699 } 700 } 701 PetscFunctionReturn(0); 702 } 703 EXTERN_C_END 704 705 EXTERN_C_BEGIN 706 #undef __FUNCT__ 707 #define __FUNCT__ "PCGASMSetTotalBlockCount_GASM" 708 PetscErrorCode PCGASMSetTotalBlockCount_GASM(PC pc,PetscInt N) { 709 PC_GASM *osm = (PC_GASM*)pc->data; 710 PetscErrorCode ierr; 711 PetscMPIInt rank,size; 712 PetscInt n; 713 714 PetscFunctionBegin; 715 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total number of subdomains must be > 0, N = %D",N); 716 717 /* 718 Split the subdomains equally among all processors 719 */ 720 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 721 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 722 n = N/size + ((N % size) > rank); 723 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 724 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 725 if (!pc->setupcalled) { 726 if (osm->is) { 727 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 728 } 729 osm->N = N; 730 osm->n = n; 731 osm->nmax = N/size + ((N%size)?1:0); 732 osm->is = 0; 733 osm->is_local = 0; 734 } 735 PetscFunctionReturn(0); 736 } 737 EXTERN_C_END 738 739 EXTERN_C_BEGIN 740 #undef __FUNCT__ 741 #define __FUNCT__ "PCGASMSetOverlap_GASM" 742 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 743 { 744 PC_GASM *osm = (PC_GASM*)pc->data; 745 746 PetscFunctionBegin; 747 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 748 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 749 if (!pc->setupcalled) { 750 osm->overlap = ovl; 751 } 752 PetscFunctionReturn(0); 753 } 754 EXTERN_C_END 755 756 EXTERN_C_BEGIN 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCGASMSetType_GASM" 759 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 760 { 761 PC_GASM *osm = (PC_GASM*)pc->data; 762 763 PetscFunctionBegin; 764 osm->type = type; 765 osm->type_set = PETSC_TRUE; 766 PetscFunctionReturn(0); 767 } 768 EXTERN_C_END 769 770 EXTERN_C_BEGIN 771 #undef __FUNCT__ 772 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 773 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 774 { 775 PC_GASM *osm = (PC_GASM*)pc->data; 776 777 PetscFunctionBegin; 778 osm->sort_indices = doSort; 779 PetscFunctionReturn(0); 780 } 781 EXTERN_C_END 782 783 EXTERN_C_BEGIN 784 #undef __FUNCT__ 785 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 786 /* 787 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 788 In particular, it would upset the global subdomain number calculation. 789 */ 790 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 791 { 792 PC_GASM *osm = (PC_GASM*)pc->data; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 797 798 if (n) { 799 *n = osm->n; 800 } 801 if (first) { 802 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 803 *first -= osm->n; 804 } 805 if (ksp) { 806 /* Assume that local solves are now different; not necessarily 807 true, though! This flag is used only for PCView_GASM() */ 808 *ksp = osm->ksp; 809 osm->same_local_solves = PETSC_FALSE; 810 } 811 PetscFunctionReturn(0); 812 }/* PCGASMGetSubKSP_GASM() */ 813 EXTERN_C_END 814 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "PCGASMSetSubdomains" 818 /*@C 819 PCGASMSetSubdomains - Sets the subdomains for this processor 820 for the additive Schwarz preconditioner. 821 822 Collective on PC 823 824 Input Parameters: 825 + pc - the preconditioner context 826 . n - the number of subdomains for this processor 827 . is - the index sets that define this processor's part of the subdomains (on which the correction is obtained) 828 (or PETSC_NULL for PETSc to determine subdomains) 829 - is_local - the index sets that define the local part of this processor's part of the subdomains (on which the correction is applied) 830 (or PETSC_NULL to use the same as is) 831 832 Notes: 833 The IS indices use the parallel, global numbering of the vector entries. 834 By default the GASM preconditioner uses 1 subdomain per processor. 835 Use PCGASMSetTotalBlockCount() to set the total number of subdomains across all processors 836 and to allow PETSc to assign them to processor and to compute the subdomains from that. 837 838 Level: advanced 839 840 .keywords: PC, GASM, set, subdomains, additive Schwarz 841 842 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 843 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 844 @*/ 845 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 846 { 847 PetscErrorCode ierr; 848 849 PetscFunctionBegin; 850 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 851 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "PCGASMSetTotalBlockCount" 857 /*@C 858 PCGASMSetTotalBlockCount - Sets the total number of subdomains 859 cumulative across all processors across the communicator of a generalized 860 additive Schwarz preconditioner. Either all or no processors in the 861 PC communicator must call this routine, with the same number. 862 863 Collective on PC 864 865 Input Parameters: 866 + pc - the preconditioner context 867 - n - the total number of subdomains cumulative across all processors 868 869 Options Database Key: 870 To set the total number of subdomain blocks rather than specify the 871 index sets, use the option 872 . -pc_gasm_total_block_count <n> - Sets the total number of local subdomains (known as blocks) to be distributed among the processors 873 874 By default the GASM preconditioner uses 1 subdomain per processor. 875 876 Use PCGASMSetSubdomains() to set local subdomains. 877 878 Level: advanced 879 880 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 881 882 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 883 PCGASMCreateSubdomains2D() 884 @*/ 885 PetscErrorCode PCGASMSetTotalBlockCount(PC pc,PetscInt N) 886 { 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 891 ierr = PetscTryMethod(pc,"PCGASMSetTotalBlockCount_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "PCGASMSetOverlap" 897 /*@ 898 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 899 additive Schwarz preconditioner. Either all or no processors in the 900 PC communicator must call this routine. 901 902 Logically Collective on PC 903 904 Input Parameters: 905 + pc - the preconditioner context 906 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 907 908 Options Database Key: 909 . -pc_gasm_overlap <overlap> - Sets overlap 910 911 Notes: 912 By default the GASM preconditioner uses 1 subdomain per processor. To use 913 multiple subdomain per perocessor, see PCGASMSetTotalBlockCount() or 914 PCGASMSetSubdomains() (and the option -pc_gasm_subdomain_count <n>). 915 916 The overlap defaults to 1, so if one desires that no additional 917 overlap be computed beyond what may have been set with a call to 918 PCGASMSetTotalBlockCount() or PCGASMSetSubdomains(), then ovl 919 must be set to be 0. In particular, if one does not explicitly set 920 the subdomains in application code, then all overlap would be computed 921 internally by PETSc, and using an overlap of 0 would result in an GASM 922 variant that is equivalent to the block Jacobi preconditioner. 923 924 Note that one can define initial index sets with any overlap via 925 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 926 PETSc to extend that overlap further, if desired. 927 928 Level: intermediate 929 930 .keywords: PC, GASM, set, overlap 931 932 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMGetSubKSP(), 933 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 934 @*/ 935 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 936 { 937 PetscErrorCode ierr; 938 939 PetscFunctionBegin; 940 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 941 PetscValidLogicalCollectiveInt(pc,ovl,2); 942 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "PCGASMSetType" 948 /*@ 949 PCGASMSetType - Sets the type of restriction and interpolation used 950 for local problems in the additive Schwarz method. 951 952 Logically Collective on PC 953 954 Input Parameters: 955 + pc - the preconditioner context 956 - type - variant of GASM, one of 957 .vb 958 PC_GASM_BASIC - full interpolation and restriction 959 PC_GASM_RESTRICT - full restriction, local processor interpolation 960 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 961 PC_GASM_NONE - local processor restriction and interpolation 962 .ve 963 964 Options Database Key: 965 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 966 967 Level: intermediate 968 969 .keywords: PC, GASM, set, type 970 971 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMGetSubKSP(), 972 PCGASMCreateSubdomains2D() 973 @*/ 974 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 975 { 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 980 PetscValidLogicalCollectiveEnum(pc,type,2); 981 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "PCGASMSetSortIndices" 987 /*@ 988 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 989 990 Logically Collective on PC 991 992 Input Parameters: 993 + pc - the preconditioner context 994 - doSort - sort the subdomain indices 995 996 Level: intermediate 997 998 .keywords: PC, GASM, set, type 999 1000 .seealso: PCGASMSetSubdomains(), PCGASMSetTotalBlockCount(), PCGASMGetSubKSP(), 1001 PCGASMCreateSubdomains2D() 1002 @*/ 1003 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1004 { 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1009 PetscValidLogicalCollectiveBool(pc,doSort,2); 1010 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "PCGASMGetSubKSP" 1016 /*@C 1017 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1018 this processor. 1019 1020 Collective on PC iff first_local is requested 1021 1022 Input Parameter: 1023 . pc - the preconditioner context 1024 1025 Output Parameters: 1026 + n_local - the number of blocks on this processor or PETSC_NULL 1027 . first_local - the global number of the first block on this processor or PETSC_NULL, 1028 all processors must request or all must pass PETSC_NULL 1029 - ksp - the array of KSP contexts 1030 1031 Note: 1032 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1033 1034 Currently for some matrix implementations only 1 block per processor 1035 is supported. 1036 1037 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1038 1039 Level: advanced 1040 1041 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 1042 1043 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMSetOverlap(), 1044 PCGASMCreateSubdomains2D(), 1045 @*/ 1046 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1047 { 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1052 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 /* -------------------------------------------------------------------------------------*/ 1057 /*MC 1058 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1059 its own KSP object. 1060 1061 Options Database Keys: 1062 + -pc_gasm_total_block_count <n> - Sets total number of local subdomains (known as blocks) to be distributed among processors 1063 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1064 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1065 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1066 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1067 1068 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1069 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1070 -pc_gasm_type basic to use the standard GASM. 1071 1072 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1073 than one processor. Defaults to one block per processor. 1074 1075 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1076 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1077 1078 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1079 and set the options directly on the resulting KSP object (you can access its PC 1080 with KSPGetPC()) 1081 1082 1083 Level: beginner 1084 1085 Concepts: additive Schwarz method 1086 1087 References: 1088 An additive variant of the Schwarz alternating method for the case of many subregions 1089 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1090 1091 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1092 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1093 1094 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1095 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1096 PCGASMSetTotalBlockCount(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1097 1098 M*/ 1099 1100 EXTERN_C_BEGIN 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "PCCreate_GASM" 1103 PetscErrorCode PCCreate_GASM(PC pc) 1104 { 1105 PetscErrorCode ierr; 1106 PC_GASM *osm; 1107 1108 PetscFunctionBegin; 1109 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1110 osm->N = PETSC_DECIDE; 1111 osm->n = PETSC_DECIDE; 1112 osm->nmax = 0; 1113 osm->overlap = 1; 1114 osm->ksp = 0; 1115 osm->grestriction = 0; 1116 osm->gprolongation = 0; 1117 osm->gx = 0; 1118 osm->gy = 0; 1119 osm->x = 0; 1120 osm->y = 0; 1121 osm->is = 0; 1122 osm->is_local = 0; 1123 osm->pmat = 0; 1124 osm->type = PC_GASM_RESTRICT; 1125 osm->same_local_solves = PETSC_TRUE; 1126 osm->sort_indices = PETSC_TRUE; 1127 1128 pc->data = (void*)osm; 1129 pc->ops->apply = PCApply_GASM; 1130 pc->ops->applytranspose = PCApplyTranspose_GASM; 1131 pc->ops->setup = PCSetUp_GASM; 1132 pc->ops->reset = PCReset_GASM; 1133 pc->ops->destroy = PCDestroy_GASM; 1134 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1135 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1136 pc->ops->view = PCView_GASM; 1137 pc->ops->applyrichardson = 0; 1138 1139 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSubdomains_C","PCGASMSetSubdomains_GASM", 1140 PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalBlockCount_C","PCGASMSetTotalBlockCount_GASM", 1142 PCGASMSetTotalBlockCount_GASM);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1144 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1146 PCGASMSetType_GASM);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1148 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1150 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 EXTERN_C_END 1154 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "PCGASMCreateLocalSubdomains" 1158 /*@C 1159 PCGASMCreateLocalSubdomains - Creates n local index sets for the overlapping 1160 Schwarz preconditioner for a any problem based on its matrix. 1161 1162 Collective 1163 1164 Input Parameters: 1165 + A - The global matrix operator 1166 - n - the number of local subdomains 1167 1168 Output Parameters: 1169 + outis - the array of index sets defining the local subdomains (on which the correction is computed) 1170 - outis_local - the array of index sets defining the local portions of the subdomains (on which the correction is applied) 1171 1172 Level: advanced 1173 1174 Note: this generates n nonoverlapping subdomains on PETSC_COMM_SELF; 1175 PCGASM will generate the overlap from these if you use them 1176 in PCGASMSetSubdomains() and set a nonzero overlap with 1177 PCGASMSetOverlap() 1178 1179 In the Fortran version you must provide the array outis[] already allocated of length n. 1180 1181 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1182 1183 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1184 @*/ 1185 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt n, IS* outis[], IS*outis_local[]) 1186 { 1187 MatPartitioning mpart; 1188 const char *prefix; 1189 PetscErrorCode (*f)(Mat,MatReuse,Mat*); 1190 PetscMPIInt size; 1191 PetscInt i,j,rstart,rend,bs; 1192 PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1193 Mat Ad = PETSC_NULL, adj; 1194 IS ispart,isnumb,*is; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1199 PetscValidPointer(outis,3); 1200 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1201 1202 /* Get prefix, row distribution, and block size */ 1203 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1205 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1206 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1207 1208 /* Get diagonal block from matrix if possible */ 1209 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1210 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1211 if (f) { 1212 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1213 } else if (size == 1) { 1214 Ad = A; 1215 } 1216 if (Ad) { 1217 ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1218 if (!isbaij) {ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1219 } 1220 if (Ad && n > 1) { 1221 PetscBool match,done; 1222 /* Try to setup a good matrix partitioning if available */ 1223 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1224 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1225 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1226 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1227 if (!match) { 1228 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1229 } 1230 if (!match) { /* assume a "good" partitioner is available */ 1231 PetscInt na,*ia,*ja; 1232 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1233 if (done) { 1234 /* Build adjacency matrix by hand. Unfortunately a call to 1235 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1236 remove the block-aij structure and we cannot expect 1237 MatPartitioning to split vertices as we need */ 1238 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1239 nnz = 0; 1240 for (i=0; i<na; i++) { /* count number of nonzeros */ 1241 len = ia[i+1] - ia[i]; 1242 row = ja + ia[i]; 1243 for (j=0; j<len; j++) { 1244 if (row[j] == i) { /* don't count diagonal */ 1245 len--; break; 1246 } 1247 } 1248 nnz += len; 1249 } 1250 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1251 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1252 nnz = 0; 1253 iia[0] = 0; 1254 for (i=0; i<na; i++) { /* fill adjacency */ 1255 cnt = 0; 1256 len = ia[i+1] - ia[i]; 1257 row = ja + ia[i]; 1258 for (j=0; j<len; j++) { 1259 if (row[j] != i) { /* if not diagonal */ 1260 jja[nnz+cnt++] = row[j]; 1261 } 1262 } 1263 nnz += cnt; 1264 iia[i+1] = nnz; 1265 } 1266 /* Partitioning of the adjacency matrix */ 1267 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1268 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1269 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1270 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1271 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1272 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1273 foundpart = PETSC_TRUE; 1274 } 1275 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1276 } 1277 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1278 } 1279 1280 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1281 *outis = is; 1282 1283 if (!foundpart) { 1284 1285 /* Partitioning by contiguous chunks of rows */ 1286 1287 PetscInt mbs = (rend-rstart)/bs; 1288 PetscInt start = rstart; 1289 for (i=0; i<n; i++) { 1290 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1291 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1292 start += count; 1293 } 1294 1295 } else { 1296 1297 /* Partitioning by adjacency of diagonal block */ 1298 1299 const PetscInt *numbering; 1300 PetscInt *count,nidx,*indices,*newidx,start=0; 1301 /* Get node count in each partition */ 1302 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1303 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1304 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1305 for (i=0; i<n; i++) count[i] *= bs; 1306 } 1307 /* Build indices from node numbering */ 1308 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1309 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1310 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1311 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1312 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1313 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1314 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1315 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1316 for (i=0; i<nidx; i++) 1317 for (j=0; j<bs; j++) 1318 newidx[i*bs+j] = indices[i]*bs + j; 1319 ierr = PetscFree(indices);CHKERRQ(ierr); 1320 nidx *= bs; 1321 indices = newidx; 1322 } 1323 /* Shift to get global indices */ 1324 for (i=0; i<nidx; i++) indices[i] += rstart; 1325 1326 /* Build the index sets for each block */ 1327 for (i=0; i<n; i++) { 1328 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1329 ierr = ISSort(is[i]);CHKERRQ(ierr); 1330 start += count[i]; 1331 } 1332 1333 ierr = PetscFree(count); 1334 ierr = PetscFree(indices); 1335 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1336 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1337 } 1338 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "PCGASMDestroySubdomains" 1344 /*@C 1345 PCGASMDestroySubdomains - Destroys the index sets created with 1346 PCGASMCreateLocalSubdomains() or PCGASMCreateSubdomains2D. Should be 1347 called after setting subdomains with PCGASMSetSubdomains(). 1348 1349 Collective 1350 1351 Input Parameters: 1352 + n - the number of index sets 1353 . is - the array of index sets 1354 - is_local - the array of local index sets, can be PETSC_NULL 1355 1356 Level: advanced 1357 1358 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1359 1360 .seealso: PCGASMCreateLocalSubdomains(), PCGASMSetSubdomains() 1361 @*/ 1362 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1363 { 1364 PetscInt i; 1365 PetscErrorCode ierr; 1366 PetscFunctionBegin; 1367 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1368 PetscValidPointer(is,2); 1369 for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); } 1370 ierr = PetscFree(is);CHKERRQ(ierr); 1371 if (is_local) { 1372 PetscValidPointer(is_local,3); 1373 for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); } 1374 ierr = PetscFree(is_local);CHKERRQ(ierr); 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 1380 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1381 { \ 1382 PetscInt first_row = first/M, last_row = last/M+1; \ 1383 /* \ 1384 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1385 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1386 subdomain). \ 1387 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1388 of the intersection. \ 1389 */ \ 1390 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1391 *ylow_loc = PetscMax(first_row,ylow); \ 1392 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1393 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1394 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1395 *yhigh_loc = PetscMin(last_row,yhigh); \ 1396 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1397 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1398 /* Now compute the size of the local subdomain n. */ \ 1399 *n = 0; \ 1400 if(*ylow_loc < *yhigh_loc) { \ 1401 PetscInt width = xright-xleft; \ 1402 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1403 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1404 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1405 }\ 1406 } 1407 1408 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1412 /*@ 1413 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1414 preconditioner for a two-dimensional problem on a regular grid. 1415 1416 Collective 1417 1418 Input Parameters: 1419 + M, N - the global number of grid points in the x and y directions 1420 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1421 . dof - degrees of freedom per node 1422 - overlap - overlap in mesh lines 1423 1424 Output Parameters: 1425 + Nsub - the number of local subdomains created 1426 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1427 - is_local - array of index sets defining non-overlapping subdomains 1428 1429 1430 Level: advanced 1431 1432 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1433 1434 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1435 PCGASMSetOverlap() 1436 @*/ 1437 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1438 { 1439 PetscErrorCode ierr; 1440 PetscMPIInt size, rank; 1441 PetscInt i, j; 1442 PetscInt maxheight, maxwidth; 1443 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1444 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1445 PetscInt x[2][2], y[2][2], n[2]; 1446 PetscInt first, last; 1447 PetscInt nidx, *idx; 1448 PetscInt ii,jj,s,q,d; 1449 PetscInt k,kk; 1450 PetscMPIInt color; 1451 MPI_Comm comm, subcomm; 1452 IS **iis = 0; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1456 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1457 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1458 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1459 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1460 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1461 1462 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1463 s = 0; 1464 ystart = 0; 1465 for (j=0; j<Ndomains; ++j) { 1466 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1467 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1468 /* Vertical domain limits with an overlap. */ 1469 ylow = PetscMax(ystart - overlap,0); 1470 yhigh = PetscMin(ystart + maxheight + overlap,N); 1471 xstart = 0; 1472 for (i=0; i<Mdomains; ++i) { 1473 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1474 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1475 /* Horizontal domain limits with an overlap. */ 1476 xleft = PetscMax(xstart - overlap,0); 1477 xright = PetscMin(xstart + maxwidth + overlap,M); 1478 /* 1479 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1480 */ 1481 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1482 if(nidx) { 1483 ++s; 1484 } 1485 xstart += maxwidth; 1486 }/* for(i = 0; i < Mdomains; ++i) */ 1487 ystart += maxheight; 1488 }/* for(j = 0; j < Ndomains; ++j) */ 1489 /* Now we can allocate the necessary number of ISs. */ 1490 *nsub = s; 1491 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1492 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1493 s = 0; 1494 ystart = 0; 1495 for (j=0; j<Ndomains; ++j) { 1496 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1497 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1498 /* Vertical domain limits with an overlap. */ 1499 y[0][0] = PetscMax(ystart - overlap,0); 1500 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1501 /* Vertical domain limits without an overlap. */ 1502 y[1][0] = ystart; 1503 y[1][1] = PetscMin(ystart + maxheight,N); 1504 xstart = 0; 1505 for (i=0; i<Mdomains; ++i) { 1506 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1507 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1508 /* Horizontal domain limits with an overlap. */ 1509 x[0][0] = PetscMax(xstart - overlap,0); 1510 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1511 /* Horizontal domain limits without an overlap. */ 1512 x[1][0] = xstart; 1513 x[1][1] = PetscMin(xstart+maxwidth,M); 1514 /* 1515 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1516 Do this twice: first for the domains with overlaps, and once without. 1517 During the first pass create the subcommunicators, and use them on the second pass as well. 1518 */ 1519 for(q = 0; q < 2; ++q) { 1520 /* 1521 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1522 according to whether the domain with an overlap or without is considered. 1523 */ 1524 xleft = x[q][0]; xright = x[q][1]; 1525 ylow = y[q][0]; yhigh = y[q][1]; 1526 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1527 nidx *= dof; 1528 n[q] = nidx; 1529 /* 1530 Based on the counted number of indices in the local domain *with an overlap*, 1531 construct a subcommunicator of all the processors supporting this domain. 1532 Observe that a domain with an overlap might have nontrivial local support, 1533 while the domain without an overlap might not. Hence, the decision to participate 1534 in the subcommunicator must be based on the domain with an overlap. 1535 */ 1536 if (q == 0) { 1537 if(nidx) { 1538 color = 1; 1539 } else { 1540 color = MPI_UNDEFINED; 1541 } 1542 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1543 } 1544 /* 1545 Proceed only if the number of local indices *with an overlap* is nonzero. 1546 */ 1547 if (n[0]) { 1548 if(q == 0) { 1549 iis = is; 1550 } 1551 if (q == 1) { 1552 /* 1553 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1554 Moreover, if the overlap is zero, the two ISs are identical. 1555 */ 1556 if (overlap == 0) { 1557 (*is_local)[s] = (*is)[s]; 1558 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1559 continue; 1560 } else { 1561 iis = is_local; 1562 subcomm = ((PetscObject)(*is)[s])->comm; 1563 } 1564 }/* if(q == 1) */ 1565 idx = PETSC_NULL; 1566 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1567 if(nidx) { 1568 k = 0; 1569 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1570 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1571 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1572 kk = dof*(M*jj + x0); 1573 for (ii=x0; ii<x1; ++ii) { 1574 for(d = 0; d < dof; ++d) { 1575 idx[k++] = kk++; 1576 } 1577 } 1578 } 1579 } 1580 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1581 }/* if(n[0]) */ 1582 }/* for(q = 0; q < 2; ++q) */ 1583 if(n[0]) { 1584 ++s; 1585 } 1586 xstart += maxwidth; 1587 }/* for(i = 0; i < Mdomains; ++i) */ 1588 ystart += maxheight; 1589 }/* for(j = 0; j < Ndomains; ++j) */ 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "PCGASMGetSubdomains" 1595 /*@C 1596 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1597 for the additive Schwarz preconditioner. 1598 1599 Not Collective 1600 1601 Input Parameter: 1602 . pc - the preconditioner context 1603 1604 Output Parameters: 1605 + n - the number of subdomains for this processor (default value = 1) 1606 . is - the index sets that define the subdomains supported on this processor 1607 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1608 1609 1610 Notes: 1611 The IS numbering is in the parallel, global numbering of the vector. 1612 1613 Level: advanced 1614 1615 .keywords: PC, GASM, get, subdomains, additive Schwarz 1616 1617 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1618 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1619 @*/ 1620 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1621 { 1622 PC_GASM *osm; 1623 PetscErrorCode ierr; 1624 PetscBool match; 1625 1626 PetscFunctionBegin; 1627 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1628 PetscValidIntPointer(n,2); 1629 if (is) PetscValidPointer(is,3); 1630 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1631 if (!match) { 1632 if (n) *n = 0; 1633 if (is) *is = PETSC_NULL; 1634 } else { 1635 osm = (PC_GASM*)pc->data; 1636 if (n) *n = osm->n; 1637 if (is) *is = osm->is; 1638 if (is_local) *is_local = osm->is_local; 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "PCGASMGetSubmatrices" 1645 /*@C 1646 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1647 only) for the additive Schwarz preconditioner. 1648 1649 Not Collective 1650 1651 Input Parameter: 1652 . pc - the preconditioner context 1653 1654 Output Parameters: 1655 + n - the number of matrices for this processor (default value = 1) 1656 - mat - the matrices 1657 1658 Notes: matrices returned by this routine have the same communicators as the index sets (IS) 1659 used to define subdomains in PCGASMSetSubdomains(), or PETSC_COMM_SELF, if the 1660 subdomains were defined using PCGASMSetTotalBlockCount(). 1661 Level: advanced 1662 1663 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1664 1665 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1666 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1667 @*/ 1668 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1669 { 1670 PC_GASM *osm; 1671 PetscErrorCode ierr; 1672 PetscBool match; 1673 1674 PetscFunctionBegin; 1675 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1676 PetscValidIntPointer(n,2); 1677 if (mat) PetscValidPointer(mat,3); 1678 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1679 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1680 if (!match) SETERRQ2(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1681 osm = (PC_GASM*)pc->data; 1682 if (n) *n = osm->n; 1683 if (mat) *mat = osm->pmat; 1684 1685 PetscFunctionReturn(0); 1686 } 1687