1 2 /* 3 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 4 In this version each processor may have any number of subdomains and a subdomain may live on multiple 5 processors. 6 7 N - total number of subdomains on all processors 8 n - actual number of subdomains on this processor 9 nmax - maximum number of subdomains per processor 10 */ 11 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 IS gis, gis_local; /* merged ISs */ 20 VecScatter grestriction; /* merged restriction */ 21 VecScatter gprolongation; /* merged prolongation */ 22 IS *is; /* index set that defines each overlapping subdomain */ 23 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 24 Mat *pmat; /* subdomain block matrices */ 25 PCGASMType type; /* use reduced interpolation, restriction or both */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMPrintSubdomains" 33 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 const char *prefix; 37 char fname[PETSC_MAX_PATH_LEN+1]; 38 PetscViewer viewer, sviewer; 39 PetscInt i,j,nidx; 40 const PetscInt *idx; 41 char *cidx; 42 PetscBool found; 43 PetscMPIInt size, rank; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 48 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 49 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 50 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 51 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 52 for (i=0;i<osm->n;++i) { 53 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 54 ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr); 55 /* 56 No more than 15 characters per index plus a space. 57 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 58 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 59 For nidx == 0, the whole string 16 '\0'. 60 */ 61 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 62 ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr); 63 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 64 for(j = 0; j < nidx; ++j) { 65 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 66 } 67 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 68 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 69 70 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr); 72 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 73 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 74 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 75 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 76 ierr = PetscFree(cidx);CHKERRQ(ierr); 77 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 78 if (osm->is_local) { 79 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 80 ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr); 81 /* 82 No more than 15 characters per index plus a space. 83 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 84 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 85 For nidx == 0, the whole string 16 '\0'. 86 */ 87 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 88 ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr); 89 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 90 for(j = 0; j < nidx; ++j) { 91 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 92 } 93 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 94 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 95 96 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 97 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr); 98 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 99 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 100 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 101 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 102 ierr = PetscFree(cidx);CHKERRQ(ierr); 103 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 104 } 105 } 106 PetscFunctionReturn(0); 107 } 108 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCView_GASM" 112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 113 { 114 PC_GASM *osm = (PC_GASM*)pc->data; 115 const char *prefix; 116 PetscErrorCode ierr; 117 PetscMPIInt rank, size; 118 PetscInt i,bsz; 119 PetscBool iascii,isstring, print_subdomains=PETSC_FALSE; 120 PetscViewer sviewer; 121 122 123 PetscFunctionBegin; 124 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 125 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 126 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 127 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 128 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 129 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 130 if (iascii) { 131 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 132 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 133 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 134 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 135 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 136 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 137 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 138 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 139 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 140 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 141 if (osm->same_local_solves) { 142 if (osm->ksp) { 143 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 144 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 145 if (!rank) { 146 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 147 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 149 } 150 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 151 } 152 } else { 153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 157 for (i=0; i<osm->nmax; i++) { 158 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 159 if (i < osm->n) { 160 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 161 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr); 162 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 163 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 164 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 165 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 166 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr); 167 } 168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 169 } 170 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 171 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172 } 173 } else if (isstring) { 174 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 175 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 176 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 177 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 178 } else { 179 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 180 } 181 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 182 if(print_subdomains) { 183 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 189 190 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCSetUp_GASM" 194 static PetscErrorCode PCSetUp_GASM(PC pc) 195 { 196 PC_GASM *osm = (PC_GASM*)pc->data; 197 PetscErrorCode ierr; 198 PetscBool symset,flg; 199 PetscInt i; 200 PetscMPIInt rank, size; 201 MatReuse scall = MAT_REUSE_MATRIX; 202 KSP ksp; 203 PC subpc; 204 const char *prefix,*pprefix; 205 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 206 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 207 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 208 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 209 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 210 Vec x,y; 211 PetscScalar *gxarray, *gyarray; 212 PetscInt gfirst, glast; 213 DM *domain_dm = PETSC_NULL; 214 215 PetscFunctionBegin; 216 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 217 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 218 if (!pc->setupcalled) { 219 220 if (!osm->type_set) { 221 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 222 if (symset && flg) { osm->type = PC_GASM_BASIC; } 223 } 224 225 if (osm->N == PETSC_DECIDE && osm->n < 1) { 226 /* no subdomains given, try pc->dm */ 227 if(pc->dm) { 228 char ddm_name[1024]; 229 DM ddm; 230 PetscBool flg; 231 PetscInt num_domains, d; 232 char **domain_names; 233 IS *domain_is; 234 /* Allow the user to request a decomposition DM by name */ 235 ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr); 236 ierr = PetscOptionsString("-pc_asm_decomposition_dm", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr); 237 if(flg) { 238 ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr); 239 if(!ddm) { 240 SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name); 241 } 242 ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr); 243 ierr = PCSetDM(pc,ddm); CHKERRQ(ierr); 244 } 245 ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm); CHKERRQ(ierr); 246 ierr = PCGASMSetLocalSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr); 247 for(d = 0; d < num_domains; ++d) { 248 ierr = PetscFree(domain_names[d]); CHKERRQ(ierr); 249 ierr = ISDestroy(&domain_is[d]); CHKERRQ(ierr); 250 } 251 ierr = PetscFree(domain_names);CHKERRQ(ierr); 252 ierr = PetscFree(domain_is);CHKERRQ(ierr); 253 } 254 else { /* no dm, use one per processor */ 255 osm->nmax = osm->n = 1; 256 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 257 osm->N = size; 258 } 259 } else if (osm->N == PETSC_DECIDE) { 260 PetscInt inwork[2], outwork[2]; 261 /* determine global number of subdomains and the max number of local subdomains */ 262 inwork[0] = inwork[1] = osm->n; 263 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 264 osm->nmax = outwork[0]; 265 osm->N = outwork[1]; 266 } 267 if (!osm->is){ /* create the index sets */ 268 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 269 } 270 if (!osm->is_local) { 271 /* 272 This indicates that osm->is should define a nonoverlapping decomposition 273 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 274 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 275 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 276 Therefore, osm->is will be used to define osm->is_local. 277 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 278 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 279 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 280 */ 281 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 282 for (i=0; i<osm->n; i++) { 283 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 284 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 285 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 286 } else { 287 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 288 osm->is_local[i] = osm->is[i]; 289 } 290 } 291 } 292 /* Beyond this point osm->is_local is not null. */ 293 if (osm->overlap > 0) { 294 /* Extend the "overlapping" regions by a number of steps */ 295 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 296 } 297 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 298 flg = PETSC_FALSE; 299 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 300 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 301 302 if (osm->sort_indices) { 303 for (i=0; i<osm->n; i++) { 304 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 305 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 306 } 307 } 308 /* Merge the ISs, create merged vectors and scatter contexts. */ 309 /* Restriction ISs. */ 310 ddn = 0; 311 for (i=0; i<osm->n; i++) { 312 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 313 ddn += dn; 314 } 315 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr); 316 ddn = 0; 317 for (i=0; i<osm->n; i++) { 318 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 319 ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr); 320 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr); 321 ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr); 322 ddn += dn; 323 } 324 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr); 325 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 326 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 327 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 328 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 329 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr); 330 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr); 331 ierr = ISDestroy(&gid);CHKERRQ(ierr); 332 /* Prolongation ISs */ 333 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 334 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 335 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 336 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 337 /**/ 338 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 339 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 340 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 341 IS gis_llocal; /* IS with ddidx_llocal indices. */ 342 PetscInt j; 343 ddn_local = 0; 344 for (i=0; i<osm->n; i++) { 345 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 346 ddn_local += dn_local; 347 } 348 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr); 349 ddn_local = 0; 350 for (i=0; i<osm->n; i++) { 351 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 352 ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr); 353 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr); 354 ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr); 355 ddn_local += dn_local; 356 } 357 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr); 358 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 360 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 361 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 362 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 363 /* Now convert these localized indices into the global indices into the merged output vector. */ 364 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr); 365 for(j=0; j < ddn_llocal; ++j) { 366 ddidx_llocal[j] += gfirst; 367 } 368 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr); 369 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr); 370 ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr); 371 } 372 /* Create the subdomain work vectors. */ 373 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 374 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 375 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 376 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 377 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 378 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 379 PetscInt dN; 380 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 381 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 382 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 383 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 384 } 385 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 386 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 387 ierr = VecDestroy(&x);CHKERRQ(ierr); 388 ierr = VecDestroy(&y);CHKERRQ(ierr); 389 /* Create the local solvers */ 390 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 391 for (i=0; i<osm->n; i++) { /* KSPs are local */ 392 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 393 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 394 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 395 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 396 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 397 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 398 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 399 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 400 osm->ksp[i] = ksp; 401 } 402 scall = MAT_INITIAL_MATRIX; 403 404 } else { 405 /* 406 Destroy the blocks from the previous iteration 407 */ 408 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 409 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 410 scall = MAT_INITIAL_MATRIX; 411 } 412 } 413 414 /* 415 Extract out the submatrices. 416 */ 417 if(size > 1) { 418 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 419 } 420 else { 421 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 422 } 423 if (scall == MAT_INITIAL_MATRIX) { 424 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 425 for (i=0; i<osm->n; i++) { 426 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 427 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 428 } 429 } 430 431 /* Return control to the user so that the submatrices can be modified (e.g., to apply 432 different boundary conditions for the submatrices than for the global problem) */ 433 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 434 435 /* 436 Loop over submatrices putting them into local ksp 437 */ 438 for (i=0; i<osm->n; i++) { 439 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 440 if (!pc->setupcalled) { 441 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 442 } 443 } 444 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 450 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 451 { 452 PC_GASM *osm = (PC_GASM*)pc->data; 453 PetscErrorCode ierr; 454 PetscInt i; 455 456 PetscFunctionBegin; 457 for (i=0; i<osm->n; i++) { 458 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 459 } 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "PCApply_GASM" 465 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 466 { 467 PC_GASM *osm = (PC_GASM*)pc->data; 468 PetscErrorCode ierr; 469 PetscInt i; 470 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 471 472 PetscFunctionBegin; 473 /* 474 Support for limiting the restriction or interpolation to only local 475 subdomain values (leaving the other values 0). 476 */ 477 if (!(osm->type & PC_GASM_RESTRICT)) { 478 forward = SCATTER_FORWARD_LOCAL; 479 /* have to zero the work RHS since scatter may leave some slots empty */ 480 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 481 } 482 if (!(osm->type & PC_GASM_INTERPOLATE)) { 483 reverse = SCATTER_REVERSE_LOCAL; 484 } 485 486 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 487 ierr = VecZeroEntries(y);CHKERRQ(ierr); 488 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 489 /* do the subdomain solves */ 490 for (i=0; i<osm->n; ++i) { 491 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 492 } 493 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 494 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "PCApplyTranspose_GASM" 500 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 501 { 502 PC_GASM *osm = (PC_GASM*)pc->data; 503 PetscErrorCode ierr; 504 PetscInt i; 505 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 506 507 PetscFunctionBegin; 508 /* 509 Support for limiting the restriction or interpolation to only local 510 subdomain values (leaving the other values 0). 511 512 Note: these are reversed from the PCApply_GASM() because we are applying the 513 transpose of the three terms 514 */ 515 if (!(osm->type & PC_GASM_INTERPOLATE)) { 516 forward = SCATTER_FORWARD_LOCAL; 517 /* have to zero the work RHS since scatter may leave some slots empty */ 518 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 519 } 520 if (!(osm->type & PC_GASM_RESTRICT)) { 521 reverse = SCATTER_REVERSE_LOCAL; 522 } 523 524 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 525 ierr = VecZeroEntries(y);CHKERRQ(ierr); 526 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 527 /* do the local solves */ 528 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 529 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 530 } 531 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 532 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "PCReset_GASM" 538 static PetscErrorCode PCReset_GASM(PC pc) 539 { 540 PC_GASM *osm = (PC_GASM*)pc->data; 541 PetscErrorCode ierr; 542 PetscInt i; 543 544 PetscFunctionBegin; 545 if (osm->ksp) { 546 for (i=0; i<osm->n; i++) { 547 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 548 } 549 } 550 if (osm->pmat) { 551 if (osm->n > 0) { 552 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 553 } 554 } 555 if (osm->x) { 556 for (i=0; i<osm->n; i++) { 557 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 558 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 559 } 560 } 561 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 562 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 563 564 ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr); 565 ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr); 566 if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;} 567 ierr = ISDestroy(&osm->gis);CHKERRQ(ierr); 568 ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "PCDestroy_GASM" 574 static PetscErrorCode PCDestroy_GASM(PC pc) 575 { 576 PC_GASM *osm = (PC_GASM*)pc->data; 577 PetscErrorCode ierr; 578 PetscInt i; 579 580 PetscFunctionBegin; 581 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 582 if (osm->ksp) { 583 for (i=0; i<osm->n; i++) { 584 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 585 } 586 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 587 } 588 ierr = PetscFree(osm->x);CHKERRQ(ierr); 589 ierr = PetscFree(osm->y);CHKERRQ(ierr); 590 ierr = PetscFree(pc->data);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "PCSetFromOptions_GASM" 596 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 597 PC_GASM *osm = (PC_GASM*)pc->data; 598 PetscErrorCode ierr; 599 PetscInt blocks,ovl; 600 PetscBool symset,flg; 601 PCGASMType gasmtype; 602 603 PetscFunctionBegin; 604 /* set the type to symmetric if matrix is symmetric */ 605 if (!osm->type_set && pc->pmat) { 606 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 607 if (symset && flg) { osm->type = PC_GASM_BASIC; } 608 } 609 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 610 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 611 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 612 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 613 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 614 flg = PETSC_FALSE; 615 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 616 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 617 ierr = PetscOptionsTail();CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 621 /*------------------------------------------------------------------------------------*/ 622 623 EXTERN_C_BEGIN 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 626 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 627 { 628 PC_GASM *osm = (PC_GASM*)pc->data; 629 PetscErrorCode ierr; 630 PetscInt i; 631 632 PetscFunctionBegin; 633 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 634 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 635 636 if (!pc->setupcalled) { 637 osm->n = n; 638 osm->is = 0; 639 osm->is_local = 0; 640 if (is) { 641 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 642 } 643 if (is_local) { 644 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 645 } 646 if (osm->is) { 647 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 648 } 649 if (is) { 650 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 651 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 652 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 653 osm->overlap = -1; 654 } 655 if (is_local) { 656 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 657 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 658 } 659 } 660 PetscFunctionReturn(0); 661 } 662 EXTERN_C_END 663 664 EXTERN_C_BEGIN 665 #undef __FUNCT__ 666 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 667 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 668 PC_GASM *osm = (PC_GASM*)pc->data; 669 PetscErrorCode ierr; 670 PetscMPIInt rank,size; 671 PetscInt n; 672 673 PetscFunctionBegin; 674 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 675 676 /* 677 Split the subdomains equally among all processors 678 */ 679 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 680 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 681 n = N/size + ((N % size) > rank); 682 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 683 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 684 if (!pc->setupcalled) { 685 if (osm->is) { 686 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 687 } 688 osm->N = N; 689 osm->n = n; 690 osm->is = 0; 691 osm->is_local = 0; 692 } 693 PetscFunctionReturn(0); 694 }/* PCGASMSetTotalSubdomains_GASM() */ 695 EXTERN_C_END 696 697 EXTERN_C_BEGIN 698 #undef __FUNCT__ 699 #define __FUNCT__ "PCGASMSetOverlap_GASM" 700 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 701 { 702 PC_GASM *osm = (PC_GASM*)pc->data; 703 704 PetscFunctionBegin; 705 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 706 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 707 if (!pc->setupcalled) { 708 osm->overlap = ovl; 709 } 710 PetscFunctionReturn(0); 711 } 712 EXTERN_C_END 713 714 EXTERN_C_BEGIN 715 #undef __FUNCT__ 716 #define __FUNCT__ "PCGASMSetType_GASM" 717 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 718 { 719 PC_GASM *osm = (PC_GASM*)pc->data; 720 721 PetscFunctionBegin; 722 osm->type = type; 723 osm->type_set = PETSC_TRUE; 724 PetscFunctionReturn(0); 725 } 726 EXTERN_C_END 727 728 EXTERN_C_BEGIN 729 #undef __FUNCT__ 730 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 731 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 732 { 733 PC_GASM *osm = (PC_GASM*)pc->data; 734 735 PetscFunctionBegin; 736 osm->sort_indices = doSort; 737 PetscFunctionReturn(0); 738 } 739 EXTERN_C_END 740 741 EXTERN_C_BEGIN 742 #undef __FUNCT__ 743 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 744 /* 745 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 746 In particular, it would upset the global subdomain number calculation. 747 */ 748 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 749 { 750 PC_GASM *osm = (PC_GASM*)pc->data; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 755 756 if (n) { 757 *n = osm->n; 758 } 759 if (first) { 760 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 761 *first -= osm->n; 762 } 763 if (ksp) { 764 /* Assume that local solves are now different; not necessarily 765 true though! This flag is used only for PCView_GASM() */ 766 *ksp = osm->ksp; 767 osm->same_local_solves = PETSC_FALSE; 768 } 769 PetscFunctionReturn(0); 770 }/* PCGASMGetSubKSP_GASM() */ 771 EXTERN_C_END 772 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "PCGASMSetLocalSubdomains" 776 /*@C 777 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 778 only) for the additive Schwarz preconditioner. 779 780 Collective on PC 781 782 Input Parameters: 783 + pc - the preconditioner context 784 . n - the number of subdomains for this processor (default value = 1) 785 . is - the index set that defines the subdomains for this processor 786 (or PETSC_NULL for PETSc to determine subdomains) 787 - is_local - the index sets that define the local part of the subdomains for this processor 788 (or PETSC_NULL to use the default of 1 subdomain per process) 789 790 Notes: 791 The IS numbering is in the parallel, global numbering of the vector. 792 793 By default the GASM preconditioner uses 1 block per processor. 794 795 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 796 797 Level: advanced 798 799 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 800 801 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 802 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 803 @*/ 804 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 805 { 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 810 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "PCGASMSetTotalSubdomains" 816 /*@C 817 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 818 additive Schwarz preconditioner. Either all or no processors in the 819 PC communicator must call this routine, with the same index sets. 820 821 Collective on PC 822 823 Input Parameters: 824 + pc - the preconditioner context 825 . n - the number of subdomains for all processors 826 . is - the index sets that define the subdomains for all processor 827 (or PETSC_NULL for PETSc to determine subdomains) 828 - is_local - the index sets that define the local part of the subdomains for this processor 829 (or PETSC_NULL to use the default of 1 subdomain per process) 830 831 Options Database Key: 832 To set the total number of subdomain blocks rather than specify the 833 index sets, use the option 834 . -pc_gasm_blocks <blks> - Sets total blocks 835 836 Notes: 837 Currently you cannot use this to set the actual subdomains with the argument is. 838 839 By default the GASM preconditioner uses 1 block per processor. 840 841 These index sets cannot be destroyed until after completion of the 842 linear solves for which the GASM preconditioner is being used. 843 844 Use PCGASMSetLocalSubdomains() to set local subdomains. 845 846 Level: advanced 847 848 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 849 850 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 851 PCGASMCreateSubdomains2D() 852 @*/ 853 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 854 { 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 859 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "PCGASMSetOverlap" 865 /*@ 866 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 867 additive Schwarz preconditioner. Either all or no processors in the 868 PC communicator must call this routine. 869 870 Logically Collective on PC 871 872 Input Parameters: 873 + pc - the preconditioner context 874 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 875 876 Options Database Key: 877 . -pc_gasm_overlap <ovl> - Sets overlap 878 879 Notes: 880 By default the GASM preconditioner uses 1 block per processor. To use 881 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 882 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 883 884 The overlap defaults to 1, so if one desires that no additional 885 overlap be computed beyond what may have been set with a call to 886 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 887 must be set to be 0. In particular, if one does not explicitly set 888 the subdomains an application code, then all overlap would be computed 889 internally by PETSc, and using an overlap of 0 would result in an GASM 890 variant that is equivalent to the block Jacobi preconditioner. 891 892 Note that one can define initial index sets with any overlap via 893 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 894 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 895 if desired. 896 897 Level: intermediate 898 899 .keywords: PC, GASM, set, overlap 900 901 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 902 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 903 @*/ 904 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 905 { 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 910 PetscValidLogicalCollectiveInt(pc,ovl,2); 911 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCGASMSetType" 917 /*@ 918 PCGASMSetType - Sets the type of restriction and interpolation used 919 for local problems in the additive Schwarz method. 920 921 Logically Collective on PC 922 923 Input Parameters: 924 + pc - the preconditioner context 925 - type - variant of GASM, one of 926 .vb 927 PC_GASM_BASIC - full interpolation and restriction 928 PC_GASM_RESTRICT - full restriction, local processor interpolation 929 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 930 PC_GASM_NONE - local processor restriction and interpolation 931 .ve 932 933 Options Database Key: 934 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 935 936 Level: intermediate 937 938 .keywords: PC, GASM, set, type 939 940 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 941 PCGASMCreateSubdomains2D() 942 @*/ 943 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 944 { 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 949 PetscValidLogicalCollectiveEnum(pc,type,2); 950 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "PCGASMSetSortIndices" 956 /*@ 957 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 958 959 Logically Collective on PC 960 961 Input Parameters: 962 + pc - the preconditioner context 963 - doSort - sort the subdomain indices 964 965 Level: intermediate 966 967 .keywords: PC, GASM, set, type 968 969 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 970 PCGASMCreateSubdomains2D() 971 @*/ 972 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 973 { 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 978 PetscValidLogicalCollectiveBool(pc,doSort,2); 979 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "PCGASMGetSubKSP" 985 /*@C 986 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 987 this processor. 988 989 Collective on PC iff first_local is requested 990 991 Input Parameter: 992 . pc - the preconditioner context 993 994 Output Parameters: 995 + n_local - the number of blocks on this processor or PETSC_NULL 996 . first_local - the global number of the first block on this processor or PETSC_NULL, 997 all processors must request or all must pass PETSC_NULL 998 - ksp - the array of KSP contexts 999 1000 Note: 1001 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1002 1003 Currently for some matrix implementations only 1 block per processor 1004 is supported. 1005 1006 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1007 1008 Level: advanced 1009 1010 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 1011 1012 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 1013 PCGASMCreateSubdomains2D(), 1014 @*/ 1015 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1016 { 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1021 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 /* -------------------------------------------------------------------------------------*/ 1026 /*MC 1027 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1028 its own KSP object. 1029 1030 Options Database Keys: 1031 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 1032 . -pc_gasm_blocks <blks> - Sets total blocks 1033 . -pc_gasm_overlap <ovl> - Sets overlap 1034 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1035 1036 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1037 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1038 -pc_gasm_type basic to use the standard GASM. 1039 1040 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1041 than one processor. Defaults to one block per processor. 1042 1043 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1044 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1045 1046 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1047 and set the options directly on the resulting KSP object (you can access its PC 1048 with KSPGetPC()) 1049 1050 1051 Level: beginner 1052 1053 Concepts: additive Schwarz method 1054 1055 References: 1056 An additive variant of the Schwarz alternating method for the case of many subregions 1057 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1058 1059 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1060 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1061 1062 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1063 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1064 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1065 1066 M*/ 1067 1068 EXTERN_C_BEGIN 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "PCCreate_GASM" 1071 PetscErrorCode PCCreate_GASM(PC pc) 1072 { 1073 PetscErrorCode ierr; 1074 PC_GASM *osm; 1075 1076 PetscFunctionBegin; 1077 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1078 osm->N = PETSC_DECIDE; 1079 osm->n = 0; 1080 osm->nmax = 0; 1081 osm->overlap = 1; 1082 osm->ksp = 0; 1083 osm->grestriction = 0; 1084 osm->gprolongation = 0; 1085 osm->gx = 0; 1086 osm->gy = 0; 1087 osm->x = 0; 1088 osm->y = 0; 1089 osm->is = 0; 1090 osm->is_local = 0; 1091 osm->pmat = 0; 1092 osm->type = PC_GASM_RESTRICT; 1093 osm->same_local_solves = PETSC_TRUE; 1094 osm->sort_indices = PETSC_TRUE; 1095 1096 pc->data = (void*)osm; 1097 pc->ops->apply = PCApply_GASM; 1098 pc->ops->applytranspose = PCApplyTranspose_GASM; 1099 pc->ops->setup = PCSetUp_GASM; 1100 pc->ops->reset = PCReset_GASM; 1101 pc->ops->destroy = PCDestroy_GASM; 1102 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1103 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1104 pc->ops->view = PCView_GASM; 1105 pc->ops->applyrichardson = 0; 1106 1107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1108 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1110 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1112 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1114 PCGASMSetType_GASM);CHKERRQ(ierr); 1115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1116 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1118 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 EXTERN_C_END 1122 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "PCGASMCreateSubdomains" 1126 /*@C 1127 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1128 preconditioner for a any problem on a general grid. 1129 1130 Collective 1131 1132 Input Parameters: 1133 + A - The global matrix operator 1134 - n - the number of local blocks 1135 1136 Output Parameters: 1137 . outis - the array of index sets defining the subdomains 1138 1139 Level: advanced 1140 1141 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1142 from these if you use PCGASMSetLocalSubdomains() 1143 1144 In the Fortran version you must provide the array outis[] already allocated of length n. 1145 1146 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1147 1148 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1149 @*/ 1150 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1151 { 1152 MatPartitioning mpart; 1153 const char *prefix; 1154 PetscErrorCode (*f)(Mat,MatReuse,Mat*); 1155 PetscMPIInt size; 1156 PetscInt i,j,rstart,rend,bs; 1157 PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1158 Mat Ad = PETSC_NULL, adj; 1159 IS ispart,isnumb,*is; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1164 PetscValidPointer(outis,3); 1165 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1166 1167 /* Get prefix, row distribution, and block size */ 1168 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1169 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1170 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1171 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1172 1173 /* Get diagonal block from matrix if possible */ 1174 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1175 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1176 if (f) { 1177 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1178 } else if (size == 1) { 1179 Ad = A; 1180 } 1181 if (Ad) { 1182 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1183 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1184 } 1185 if (Ad && n > 1) { 1186 PetscBool match,done; 1187 /* Try to setup a good matrix partitioning if available */ 1188 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1189 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1190 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1192 if (!match) { 1193 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1194 } 1195 if (!match) { /* assume a "good" partitioner is available */ 1196 PetscInt na,*ia,*ja; 1197 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1198 if (done) { 1199 /* Build adjacency matrix by hand. Unfortunately a call to 1200 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1201 remove the block-aij structure and we cannot expect 1202 MatPartitioning to split vertices as we need */ 1203 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1204 nnz = 0; 1205 for (i=0; i<na; i++) { /* count number of nonzeros */ 1206 len = ia[i+1] - ia[i]; 1207 row = ja + ia[i]; 1208 for (j=0; j<len; j++) { 1209 if (row[j] == i) { /* don't count diagonal */ 1210 len--; break; 1211 } 1212 } 1213 nnz += len; 1214 } 1215 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1216 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1217 nnz = 0; 1218 iia[0] = 0; 1219 for (i=0; i<na; i++) { /* fill adjacency */ 1220 cnt = 0; 1221 len = ia[i+1] - ia[i]; 1222 row = ja + ia[i]; 1223 for (j=0; j<len; j++) { 1224 if (row[j] != i) { /* if not diagonal */ 1225 jja[nnz+cnt++] = row[j]; 1226 } 1227 } 1228 nnz += cnt; 1229 iia[i+1] = nnz; 1230 } 1231 /* Partitioning of the adjacency matrix */ 1232 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1233 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1234 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1235 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1236 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1237 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1238 foundpart = PETSC_TRUE; 1239 } 1240 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1241 } 1242 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1243 } 1244 1245 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1246 *outis = is; 1247 1248 if (!foundpart) { 1249 1250 /* Partitioning by contiguous chunks of rows */ 1251 1252 PetscInt mbs = (rend-rstart)/bs; 1253 PetscInt start = rstart; 1254 for (i=0; i<n; i++) { 1255 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1256 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1257 start += count; 1258 } 1259 1260 } else { 1261 1262 /* Partitioning by adjacency of diagonal block */ 1263 1264 const PetscInt *numbering; 1265 PetscInt *count,nidx,*indices,*newidx,start=0; 1266 /* Get node count in each partition */ 1267 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1268 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1269 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1270 for (i=0; i<n; i++) count[i] *= bs; 1271 } 1272 /* Build indices from node numbering */ 1273 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1274 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1275 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1276 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1277 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1278 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1279 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1280 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1281 for (i=0; i<nidx; i++) 1282 for (j=0; j<bs; j++) 1283 newidx[i*bs+j] = indices[i]*bs + j; 1284 ierr = PetscFree(indices);CHKERRQ(ierr); 1285 nidx *= bs; 1286 indices = newidx; 1287 } 1288 /* Shift to get global indices */ 1289 for (i=0; i<nidx; i++) indices[i] += rstart; 1290 1291 /* Build the index sets for each block */ 1292 for (i=0; i<n; i++) { 1293 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1294 ierr = ISSort(is[i]);CHKERRQ(ierr); 1295 start += count[i]; 1296 } 1297 1298 ierr = PetscFree(count); 1299 ierr = PetscFree(indices); 1300 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1301 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1302 } 1303 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "PCGASMDestroySubdomains" 1309 /*@C 1310 PCGASMDestroySubdomains - Destroys the index sets created with 1311 PCGASMCreateSubdomains(). Should be called after setting subdomains 1312 with PCGASMSetLocalSubdomains(). 1313 1314 Collective 1315 1316 Input Parameters: 1317 + n - the number of index sets 1318 . is - the array of index sets 1319 - is_local - the array of local index sets, can be PETSC_NULL 1320 1321 Level: advanced 1322 1323 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1324 1325 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1326 @*/ 1327 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1328 { 1329 PetscInt i; 1330 PetscErrorCode ierr; 1331 PetscFunctionBegin; 1332 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1333 PetscValidPointer(is,2); 1334 for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); } 1335 ierr = PetscFree(is);CHKERRQ(ierr); 1336 if (is_local) { 1337 PetscValidPointer(is_local,3); 1338 for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); } 1339 ierr = PetscFree(is_local);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 1345 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1346 { \ 1347 PetscInt first_row = first/M, last_row = last/M+1; \ 1348 /* \ 1349 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1350 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1351 subdomain). \ 1352 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1353 of the intersection. \ 1354 */ \ 1355 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1356 *ylow_loc = PetscMax(first_row,ylow); \ 1357 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1358 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1359 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1360 *yhigh_loc = PetscMin(last_row,yhigh); \ 1361 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1362 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1363 /* Now compute the size of the local subdomain n. */ \ 1364 *n = 0; \ 1365 if(*ylow_loc < *yhigh_loc) { \ 1366 PetscInt width = xright-xleft; \ 1367 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1368 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1369 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1370 }\ 1371 } 1372 1373 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1377 /*@ 1378 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1379 preconditioner for a two-dimensional problem on a regular grid. 1380 1381 Collective 1382 1383 Input Parameters: 1384 + M, N - the global number of mesh points in the x and y directions 1385 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1386 . dof - degrees of freedom per node 1387 - overlap - overlap in mesh lines 1388 1389 Output Parameters: 1390 + Nsub - the number of local subdomains created 1391 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1392 - is_local - array of index sets defining non-overlapping subdomains 1393 1394 Note: 1395 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1396 preconditioners. More general related routines are 1397 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1398 1399 Level: advanced 1400 1401 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1402 1403 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1404 PCGASMSetOverlap() 1405 @*/ 1406 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1407 { 1408 PetscErrorCode ierr; 1409 PetscMPIInt size, rank; 1410 PetscInt i, j; 1411 PetscInt maxheight, maxwidth; 1412 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1413 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1414 PetscInt x[2][2], y[2][2], n[2]; 1415 PetscInt first, last; 1416 PetscInt nidx, *idx; 1417 PetscInt ii,jj,s,q,d; 1418 PetscInt k,kk; 1419 PetscMPIInt color; 1420 MPI_Comm comm, subcomm; 1421 IS **iis = 0; 1422 1423 PetscFunctionBegin; 1424 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1425 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1426 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1427 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1428 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1429 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1430 1431 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1432 s = 0; 1433 ystart = 0; 1434 for (j=0; j<Ndomains; ++j) { 1435 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1436 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1437 /* Vertical domain limits with an overlap. */ 1438 ylow = PetscMax(ystart - overlap,0); 1439 yhigh = PetscMin(ystart + maxheight + overlap,N); 1440 xstart = 0; 1441 for (i=0; i<Mdomains; ++i) { 1442 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1443 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1444 /* Horizontal domain limits with an overlap. */ 1445 xleft = PetscMax(xstart - overlap,0); 1446 xright = PetscMin(xstart + maxwidth + overlap,M); 1447 /* 1448 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1449 */ 1450 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1451 if(nidx) { 1452 ++s; 1453 } 1454 xstart += maxwidth; 1455 }/* for(i = 0; i < Mdomains; ++i) */ 1456 ystart += maxheight; 1457 }/* for(j = 0; j < Ndomains; ++j) */ 1458 /* Now we can allocate the necessary number of ISs. */ 1459 *nsub = s; 1460 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1461 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1462 s = 0; 1463 ystart = 0; 1464 for (j=0; j<Ndomains; ++j) { 1465 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1466 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1467 /* Vertical domain limits with an overlap. */ 1468 y[0][0] = PetscMax(ystart - overlap,0); 1469 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1470 /* Vertical domain limits without an overlap. */ 1471 y[1][0] = ystart; 1472 y[1][1] = PetscMin(ystart + maxheight,N); 1473 xstart = 0; 1474 for (i=0; i<Mdomains; ++i) { 1475 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1476 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1477 /* Horizontal domain limits with an overlap. */ 1478 x[0][0] = PetscMax(xstart - overlap,0); 1479 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1480 /* Horizontal domain limits without an overlap. */ 1481 x[1][0] = xstart; 1482 x[1][1] = PetscMin(xstart+maxwidth,M); 1483 /* 1484 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1485 Do this twice: first for the domains with overlaps, and once without. 1486 During the first pass create the subcommunicators, and use them on the second pass as well. 1487 */ 1488 for(q = 0; q < 2; ++q) { 1489 /* 1490 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1491 according to whether the domain with an overlap or without is considered. 1492 */ 1493 xleft = x[q][0]; xright = x[q][1]; 1494 ylow = y[q][0]; yhigh = y[q][1]; 1495 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1496 nidx *= dof; 1497 n[q] = nidx; 1498 /* 1499 Based on the counted number of indices in the local domain *with an overlap*, 1500 construct a subcommunicator of all the processors supporting this domain. 1501 Observe that a domain with an overlap might have nontrivial local support, 1502 while the domain without an overlap might not. Hence, the decision to participate 1503 in the subcommunicator must be based on the domain with an overlap. 1504 */ 1505 if (q == 0) { 1506 if(nidx) { 1507 color = 1; 1508 } else { 1509 color = MPI_UNDEFINED; 1510 } 1511 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1512 } 1513 /* 1514 Proceed only if the number of local indices *with an overlap* is nonzero. 1515 */ 1516 if (n[0]) { 1517 if(q == 0) { 1518 iis = is; 1519 } 1520 if (q == 1) { 1521 /* 1522 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1523 Moreover, if the overlap is zero, the two ISs are identical. 1524 */ 1525 if (overlap == 0) { 1526 (*is_local)[s] = (*is)[s]; 1527 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1528 continue; 1529 } else { 1530 iis = is_local; 1531 subcomm = ((PetscObject)(*is)[s])->comm; 1532 } 1533 }/* if(q == 1) */ 1534 idx = PETSC_NULL; 1535 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1536 if(nidx) { 1537 k = 0; 1538 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1539 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1540 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1541 kk = dof*(M*jj + x0); 1542 for (ii=x0; ii<x1; ++ii) { 1543 for(d = 0; d < dof; ++d) { 1544 idx[k++] = kk++; 1545 } 1546 } 1547 } 1548 } 1549 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1550 }/* if(n[0]) */ 1551 }/* for(q = 0; q < 2; ++q) */ 1552 if(n[0]) { 1553 ++s; 1554 } 1555 xstart += maxwidth; 1556 }/* for(i = 0; i < Mdomains; ++i) */ 1557 ystart += maxheight; 1558 }/* for(j = 0; j < Ndomains; ++j) */ 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1564 /*@C 1565 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1566 only) for the additive Schwarz preconditioner. 1567 1568 Not Collective 1569 1570 Input Parameter: 1571 . pc - the preconditioner context 1572 1573 Output Parameters: 1574 + n - the number of subdomains for this processor (default value = 1) 1575 . is - the index sets that define the subdomains for this processor 1576 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1577 1578 1579 Notes: 1580 The IS numbering is in the parallel, global numbering of the vector. 1581 1582 Level: advanced 1583 1584 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1585 1586 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1587 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1588 @*/ 1589 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1590 { 1591 PC_GASM *osm; 1592 PetscErrorCode ierr; 1593 PetscBool match; 1594 1595 PetscFunctionBegin; 1596 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1597 PetscValidIntPointer(n,2); 1598 if (is) PetscValidPointer(is,3); 1599 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1600 if (!match) { 1601 if (n) *n = 0; 1602 if (is) *is = PETSC_NULL; 1603 } else { 1604 osm = (PC_GASM*)pc->data; 1605 if (n) *n = osm->n; 1606 if (is) *is = osm->is; 1607 if (is_local) *is_local = osm->is_local; 1608 } 1609 PetscFunctionReturn(0); 1610 } 1611 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1614 /*@C 1615 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1616 only) for the additive Schwarz preconditioner. 1617 1618 Not Collective 1619 1620 Input Parameter: 1621 . pc - the preconditioner context 1622 1623 Output Parameters: 1624 + n - the number of matrices for this processor (default value = 1) 1625 - mat - the matrices 1626 1627 1628 Level: advanced 1629 1630 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1631 1632 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1633 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1634 @*/ 1635 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1636 { 1637 PC_GASM *osm; 1638 PetscErrorCode ierr; 1639 PetscBool match; 1640 1641 PetscFunctionBegin; 1642 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1643 PetscValidIntPointer(n,2); 1644 if (mat) PetscValidPointer(mat,3); 1645 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1646 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1647 if (!match) { 1648 if (n) *n = 0; 1649 if (mat) *mat = PETSC_NULL; 1650 } else { 1651 osm = (PC_GASM*)pc->data; 1652 if (n) *n = osm->n; 1653 if (mat) *mat = osm->pmat; 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657