1 2 /* 3 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 4 In this version each processor may have any number of subdomains and a subdomain may live on multiple 5 processors. 6 7 N - total number of subdomains on all processors 8 n - actual number of subdomains on this processor 9 nmax - maximum number of subdomains per processor 10 */ 11 #include <private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 IS gis, gis_local; /* merged ISs */ 20 VecScatter grestriction; /* merged restriction */ 21 VecScatter gprolongation; /* merged prolongation */ 22 IS *is; /* index set that defines each overlapping subdomain */ 23 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 24 Mat *pmat; /* subdomain block matrices */ 25 PCGASMType type; /* use reduced interpolation, restriction or both */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMPrintSubdomains" 33 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 const char *prefix; 37 char fname[PETSC_MAX_PATH_LEN+1]; 38 PetscViewer viewer, sviewer; 39 PetscInt i,j,nidx; 40 const PetscInt *idx; 41 char *cidx; 42 PetscBool found; 43 PetscMPIInt size, rank; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 48 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 49 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 50 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 51 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 52 for (i=0;i<osm->n;++i) { 53 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 54 ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr); 55 /* 56 No more than 15 characters per index plus a space. 57 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 58 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 59 For nidx == 0, the whole string 16 '\0'. 60 */ 61 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr); 62 ierr = ISGetIndices(osm->is[i], &idx); CHKERRQ(ierr); 63 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr); 64 for(j = 0; j < nidx; ++j) { 65 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 66 } 67 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 68 ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr); 69 70 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr); 71 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 72 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 73 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 74 ierr = PetscFree(cidx); CHKERRQ(ierr); 75 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 76 if (osm->is_local) { 77 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 78 ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr); 79 /* 80 No more than 15 characters per index plus a space. 81 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 82 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 83 For nidx == 0, the whole string 16 '\0'. 84 */ 85 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr); 86 ierr = ISGetIndices(osm->is_local[i], &idx); CHKERRQ(ierr); 87 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr); 88 for(j = 0; j < nidx; ++j) { 89 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 90 } 91 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 92 ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr); 93 94 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr); 95 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 96 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 97 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 98 ierr = PetscFree(cidx); CHKERRQ(ierr); 99 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 100 } 101 } 102 PetscFunctionReturn(0); 103 } 104 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "PCView_GASM" 108 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 109 { 110 PC_GASM *osm = (PC_GASM*)pc->data; 111 const char *prefix; 112 PetscErrorCode ierr; 113 PetscMPIInt rank, size; 114 PetscInt i,bsz; 115 PetscBool iascii,isstring, print_subdomains; 116 PetscViewer sviewer; 117 118 119 PetscFunctionBegin; 120 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 121 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 122 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 123 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 124 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 125 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 126 if (iascii) { 127 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 128 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 129 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 130 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 131 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 132 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 133 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 134 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 135 if (osm->same_local_solves) { 136 if (osm->ksp) { 137 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 138 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 139 if (!rank) { 140 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 141 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 142 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 143 } 144 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 145 } 146 } else { 147 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 149 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 150 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 151 for (i=0; i<osm->nmax; i++) { 152 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 153 if (i < osm->n) { 154 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 155 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 156 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 157 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 158 } 159 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 160 } 161 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 162 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 163 } 164 } else if (isstring) { 165 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 166 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 167 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 169 } else { 170 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 171 } 172 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 173 if(print_subdomains) { 174 ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 180 181 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "PCSetUp_GASM" 185 static PetscErrorCode PCSetUp_GASM(PC pc) 186 { 187 PC_GASM *osm = (PC_GASM*)pc->data; 188 PetscErrorCode ierr; 189 PetscBool symset,flg; 190 PetscInt i; 191 PetscMPIInt rank, size; 192 MatReuse scall = MAT_REUSE_MATRIX; 193 KSP ksp; 194 PC subpc; 195 const char *prefix,*pprefix; 196 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 197 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 198 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 199 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 200 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 201 Vec x,y; 202 PetscScalar *gxarray, *gyarray; 203 PetscInt gfirst, glast; 204 205 PetscFunctionBegin; 206 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 207 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 208 if (!pc->setupcalled) { 209 210 if (!osm->type_set) { 211 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 212 if (symset && flg) { osm->type = PC_GASM_BASIC; } 213 } 214 215 if (osm->N == PETSC_DECIDE && osm->n < 1) { 216 /* no subdomains given, use one per processor */ 217 osm->nmax = osm->n = 1; 218 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 219 osm->N = size; 220 } else if (osm->N == PETSC_DECIDE) { 221 PetscInt inwork[2], outwork[2]; 222 /* determine global number of subdomains and the max number of local subdomains */ 223 inwork[0] = inwork[1] = osm->n; 224 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 225 osm->nmax = outwork[0]; 226 osm->N = outwork[1]; 227 } 228 if (!osm->is){ /* create the index sets */ 229 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 230 } 231 if (!osm->is_local) { 232 /* 233 This indicates that osm->is should define a nonoverlapping decomposition 234 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 235 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 236 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 237 Therefore, osm->is will be used to define osm->is_local. 238 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 239 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 240 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 241 */ 242 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 243 for (i=0; i<osm->n; i++) { 244 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 245 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 246 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 247 } else { 248 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 249 osm->is_local[i] = osm->is[i]; 250 } 251 } 252 } 253 /* Beyond this point osm->is_local is not null. */ 254 if (osm->overlap > 0) { 255 /* Extend the "overlapping" regions by a number of steps */ 256 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 257 } 258 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 259 flg = PETSC_FALSE; 260 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 261 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 262 263 if (osm->sort_indices) { 264 for (i=0; i<osm->n; i++) { 265 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 266 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 267 } 268 } 269 /* Merge the ISs, create merged vectors and scatter contexts. */ 270 /* Restriction ISs. */ 271 ddn = 0; 272 for (i=0; i<osm->n; i++) { 273 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 274 ddn += dn; 275 } 276 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr); 277 ddn = 0; 278 for (i=0; i<osm->n; i++) { 279 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 280 ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr); 281 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr); 282 ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr); 283 ddn += dn; 284 } 285 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr); 286 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 287 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr); 288 ierr = VecDuplicate(osm->gx,&osm->gy); CHKERRQ(ierr); 289 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast); CHKERRQ(ierr); 290 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid); CHKERRQ(ierr); 291 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction)); CHKERRQ(ierr); 292 ierr = ISDestroy(gid); CHKERRQ(ierr); 293 /* Prolongation ISs */ 294 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 295 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 296 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 297 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 298 /**/ 299 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 300 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 301 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 302 IS gis_llocal; /* IS with ddidx_llocal indices. */ 303 PetscInt j; 304 ddn_local = 0; 305 for (i=0; i<osm->n; i++) { 306 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 307 ddn_local += dn_local; 308 } 309 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr); 310 ddn_local = 0; 311 for (i=0; i<osm->n; i++) { 312 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 313 ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr); 314 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr); 315 ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr); 316 ddn_local += dn_local; 317 } 318 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr); 319 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr); 320 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 321 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 322 ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr); 323 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 324 /* Now convert these localized indices into the global indices into the merged output vector. */ 325 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr); 326 for(j=0; j < ddn_llocal; ++j) { 327 ddidx_llocal[j] += gfirst; 328 } 329 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr); 330 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation); CHKERRQ(ierr); 331 ierr = ISDestroy(gis_llocal);CHKERRQ(ierr); 332 } 333 /* Create the subdomain work vectors. */ 334 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 335 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 336 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 337 ierr = VecGetArray(osm->gx, &gxarray); CHKERRQ(ierr); 338 ierr = VecGetArray(osm->gy, &gyarray); CHKERRQ(ierr); 339 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 340 PetscInt dN; 341 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 342 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 343 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 344 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 345 } 346 ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr); 347 ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr); 348 ierr = VecDestroy(x); CHKERRQ(ierr); 349 ierr = VecDestroy(y); CHKERRQ(ierr); 350 /* Create the local solvers */ 351 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 352 for (i=0; i<osm->n; i++) { /* KSPs are local */ 353 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 354 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 355 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 356 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 357 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 358 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 359 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 360 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 361 osm->ksp[i] = ksp; 362 } 363 scall = MAT_INITIAL_MATRIX; 364 365 } else { 366 /* 367 Destroy the blocks from the previous iteration 368 */ 369 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 370 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 371 scall = MAT_INITIAL_MATRIX; 372 } 373 } 374 375 /* 376 Extract out the submatrices. 377 */ 378 if(size > 1) { 379 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 380 } 381 else { 382 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 383 } 384 if (scall == MAT_INITIAL_MATRIX) { 385 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 386 for (i=0; i<osm->n; i++) { 387 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 388 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 389 } 390 } 391 392 /* Return control to the user so that the submatrices can be modified (e.g., to apply 393 different boundary conditions for the submatrices than for the global problem) */ 394 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 395 396 /* 397 Loop over submatrices putting them into local ksp 398 */ 399 for (i=0; i<osm->n; i++) { 400 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 401 if (!pc->setupcalled) { 402 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 403 } 404 } 405 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 411 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 412 { 413 PC_GASM *osm = (PC_GASM*)pc->data; 414 PetscErrorCode ierr; 415 PetscInt i; 416 417 PetscFunctionBegin; 418 for (i=0; i<osm->n; i++) { 419 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "PCApply_GASM" 426 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 427 { 428 PC_GASM *osm = (PC_GASM*)pc->data; 429 PetscErrorCode ierr; 430 PetscInt i; 431 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 432 433 PetscFunctionBegin; 434 /* 435 Support for limiting the restriction or interpolation to only local 436 subdomain values (leaving the other values 0). 437 */ 438 if (!(osm->type & PC_GASM_RESTRICT)) { 439 forward = SCATTER_FORWARD_LOCAL; 440 /* have to zero the work RHS since scatter may leave some slots empty */ 441 ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr); 442 } 443 if (!(osm->type & PC_GASM_INTERPOLATE)) { 444 reverse = SCATTER_REVERSE_LOCAL; 445 } 446 447 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 448 ierr = VecZeroEntries(y);CHKERRQ(ierr); 449 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 450 /* do the subdomain solves */ 451 for (i=0; i<osm->n; ++i) { 452 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 453 } 454 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 455 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCApplyTranspose_GASM" 461 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 462 { 463 PC_GASM *osm = (PC_GASM*)pc->data; 464 PetscErrorCode ierr; 465 PetscInt i; 466 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 467 468 PetscFunctionBegin; 469 /* 470 Support for limiting the restriction or interpolation to only local 471 subdomain values (leaving the other values 0). 472 473 Note: these are reversed from the PCApply_GASM() because we are applying the 474 transpose of the three terms 475 */ 476 if (!(osm->type & PC_GASM_INTERPOLATE)) { 477 forward = SCATTER_FORWARD_LOCAL; 478 /* have to zero the work RHS since scatter may leave some slots empty */ 479 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 480 } 481 if (!(osm->type & PC_GASM_RESTRICT)) { 482 reverse = SCATTER_REVERSE_LOCAL; 483 } 484 485 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 486 ierr = VecZeroEntries(y);CHKERRQ(ierr); 487 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 488 /* do the local solves */ 489 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 490 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 491 } 492 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 493 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "PCReset_GASM" 499 static PetscErrorCode PCReset_GASM(PC pc) 500 { 501 PC_GASM *osm = (PC_GASM*)pc->data; 502 PetscErrorCode ierr; 503 PetscInt i; 504 505 PetscFunctionBegin; 506 if (osm->ksp) { 507 for (i=0; i<osm->n; i++) { 508 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 509 } 510 } 511 if (osm->pmat) { 512 if (osm->n > 0) { 513 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 514 } 515 } 516 for (i=0; i<osm->n; i++) { 517 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 518 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 519 } 520 ierr = VecDestroy(osm->gx); CHKERRQ(ierr); 521 ierr = VecDestroy(osm->gy); CHKERRQ(ierr); 522 523 ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr); 524 ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr); 525 526 if (osm->is) { 527 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 528 } 529 530 if (osm->gis) { 531 ierr = ISDestroy(osm->gis); CHKERRQ(ierr); 532 } 533 if (osm->gis_local) { 534 ierr = ISDestroy(osm->gis_local); CHKERRQ(ierr); 535 } 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "PCDestroy_GASM" 541 static PetscErrorCode PCDestroy_GASM(PC pc) 542 { 543 PC_GASM *osm = (PC_GASM*)pc->data; 544 PetscErrorCode ierr; 545 PetscInt i; 546 547 PetscFunctionBegin; 548 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 549 if (osm->ksp) { 550 for (i=0; i<osm->n; i++) { 551 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 552 } 553 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 554 } 555 ierr = PetscFree(osm->x);CHKERRQ(ierr); 556 ierr = PetscFree(osm->y);CHKERRQ(ierr); 557 ierr = PetscFree(pc->data);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "PCSetFromOptions_GASM" 563 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 564 PC_GASM *osm = (PC_GASM*)pc->data; 565 PetscErrorCode ierr; 566 PetscInt blocks,ovl; 567 PetscBool symset,flg; 568 PCGASMType gasmtype; 569 570 PetscFunctionBegin; 571 /* set the type to symmetric if matrix is symmetric */ 572 if (!osm->type_set && pc->pmat) { 573 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 574 if (symset && flg) { osm->type = PC_GASM_BASIC; } 575 } 576 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 577 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 578 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 579 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 580 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 581 flg = PETSC_FALSE; 582 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 583 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 584 ierr = PetscOptionsTail();CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 /*------------------------------------------------------------------------------------*/ 589 590 EXTERN_C_BEGIN 591 #undef __FUNCT__ 592 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 593 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 594 { 595 PC_GASM *osm = (PC_GASM*)pc->data; 596 PetscErrorCode ierr; 597 PetscInt i; 598 599 PetscFunctionBegin; 600 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 601 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 602 603 if (!pc->setupcalled) { 604 osm->n = n; 605 osm->is = 0; 606 osm->is_local = 0; 607 if (is) { 608 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 609 } 610 if (is_local) { 611 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 612 } 613 if (osm->is) { 614 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 615 } 616 if (is) { 617 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 618 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 619 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 620 osm->overlap = -1; 621 } 622 if (is_local) { 623 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 624 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 625 } 626 } 627 PetscFunctionReturn(0); 628 } 629 EXTERN_C_END 630 631 EXTERN_C_BEGIN 632 #undef __FUNCT__ 633 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 634 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 635 PC_GASM *osm = (PC_GASM*)pc->data; 636 PetscErrorCode ierr; 637 PetscMPIInt rank,size; 638 PetscInt n; 639 640 PetscFunctionBegin; 641 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 642 643 /* 644 Split the subdomains equally among all processors 645 */ 646 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 647 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 648 n = N/size + ((N % size) > rank); 649 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 650 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 651 if (!pc->setupcalled) { 652 if (osm->is) { 653 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 654 } 655 osm->N = N; 656 osm->n = n; 657 osm->is = 0; 658 osm->is_local = 0; 659 } 660 PetscFunctionReturn(0); 661 }/* PCGASMSetTotalSubdomains_GASM() */ 662 EXTERN_C_END 663 664 EXTERN_C_BEGIN 665 #undef __FUNCT__ 666 #define __FUNCT__ "PCGASMSetOverlap_GASM" 667 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 668 { 669 PC_GASM *osm = (PC_GASM*)pc->data; 670 671 PetscFunctionBegin; 672 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 673 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 674 if (!pc->setupcalled) { 675 osm->overlap = ovl; 676 } 677 PetscFunctionReturn(0); 678 } 679 EXTERN_C_END 680 681 EXTERN_C_BEGIN 682 #undef __FUNCT__ 683 #define __FUNCT__ "PCGASMSetType_GASM" 684 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 685 { 686 PC_GASM *osm = (PC_GASM*)pc->data; 687 688 PetscFunctionBegin; 689 osm->type = type; 690 osm->type_set = PETSC_TRUE; 691 PetscFunctionReturn(0); 692 } 693 EXTERN_C_END 694 695 EXTERN_C_BEGIN 696 #undef __FUNCT__ 697 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 698 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 699 { 700 PC_GASM *osm = (PC_GASM*)pc->data; 701 702 PetscFunctionBegin; 703 osm->sort_indices = doSort; 704 PetscFunctionReturn(0); 705 } 706 EXTERN_C_END 707 708 EXTERN_C_BEGIN 709 #undef __FUNCT__ 710 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 711 /* 712 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 713 In particular, it would upset the global subdomain number calculation. 714 */ 715 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 716 { 717 PC_GASM *osm = (PC_GASM*)pc->data; 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 722 723 if (n) { 724 *n = osm->n; 725 } 726 if (first) { 727 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 728 *first -= osm->n; 729 } 730 if (ksp) { 731 /* Assume that local solves are now different; not necessarily 732 true though! This flag is used only for PCView_GASM() */ 733 *ksp = osm->ksp; 734 osm->same_local_solves = PETSC_FALSE; 735 } 736 PetscFunctionReturn(0); 737 }/* PCGASMGetSubKSP_GASM() */ 738 EXTERN_C_END 739 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "PCGASMSetLocalSubdomains" 743 /*@C 744 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 745 only) for the additive Schwarz preconditioner. 746 747 Collective on PC 748 749 Input Parameters: 750 + pc - the preconditioner context 751 . n - the number of subdomains for this processor (default value = 1) 752 . is - the index set that defines the subdomains for this processor 753 (or PETSC_NULL for PETSc to determine subdomains) 754 - is_local - the index sets that define the local part of the subdomains for this processor 755 (or PETSC_NULL to use the default of 1 subdomain per process) 756 757 Notes: 758 The IS numbering is in the parallel, global numbering of the vector. 759 760 By default the GASM preconditioner uses 1 block per processor. 761 762 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 763 764 Level: advanced 765 766 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 767 768 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 769 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 770 @*/ 771 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 772 { 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 777 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "PCGASMSetTotalSubdomains" 783 /*@C 784 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 785 additive Schwarz preconditioner. Either all or no processors in the 786 PC communicator must call this routine, with the same index sets. 787 788 Collective on PC 789 790 Input Parameters: 791 + pc - the preconditioner context 792 . n - the number of subdomains for all processors 793 . is - the index sets that define the subdomains for all processor 794 (or PETSC_NULL for PETSc to determine subdomains) 795 - is_local - the index sets that define the local part of the subdomains for this processor 796 (or PETSC_NULL to use the default of 1 subdomain per process) 797 798 Options Database Key: 799 To set the total number of subdomain blocks rather than specify the 800 index sets, use the option 801 . -pc_gasm_blocks <blks> - Sets total blocks 802 803 Notes: 804 Currently you cannot use this to set the actual subdomains with the argument is. 805 806 By default the GASM preconditioner uses 1 block per processor. 807 808 These index sets cannot be destroyed until after completion of the 809 linear solves for which the GASM preconditioner is being used. 810 811 Use PCGASMSetLocalSubdomains() to set local subdomains. 812 813 Level: advanced 814 815 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 816 817 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 818 PCGASMCreateSubdomains2D() 819 @*/ 820 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 821 { 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 826 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "PCGASMSetOverlap" 832 /*@ 833 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 834 additive Schwarz preconditioner. Either all or no processors in the 835 PC communicator must call this routine. 836 837 Logically Collective on PC 838 839 Input Parameters: 840 + pc - the preconditioner context 841 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 842 843 Options Database Key: 844 . -pc_gasm_overlap <ovl> - Sets overlap 845 846 Notes: 847 By default the GASM preconditioner uses 1 block per processor. To use 848 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 849 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 850 851 The overlap defaults to 1, so if one desires that no additional 852 overlap be computed beyond what may have been set with a call to 853 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 854 must be set to be 0. In particular, if one does not explicitly set 855 the subdomains an application code, then all overlap would be computed 856 internally by PETSc, and using an overlap of 0 would result in an GASM 857 variant that is equivalent to the block Jacobi preconditioner. 858 859 Note that one can define initial index sets with any overlap via 860 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 861 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 862 if desired. 863 864 Level: intermediate 865 866 .keywords: PC, GASM, set, overlap 867 868 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 869 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 870 @*/ 871 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 872 { 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 877 PetscValidLogicalCollectiveInt(pc,ovl,2); 878 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "PCGASMSetType" 884 /*@ 885 PCGASMSetType - Sets the type of restriction and interpolation used 886 for local problems in the additive Schwarz method. 887 888 Logically Collective on PC 889 890 Input Parameters: 891 + pc - the preconditioner context 892 - type - variant of GASM, one of 893 .vb 894 PC_GASM_BASIC - full interpolation and restriction 895 PC_GASM_RESTRICT - full restriction, local processor interpolation 896 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 897 PC_GASM_NONE - local processor restriction and interpolation 898 .ve 899 900 Options Database Key: 901 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 902 903 Level: intermediate 904 905 .keywords: PC, GASM, set, type 906 907 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 908 PCGASMCreateSubdomains2D() 909 @*/ 910 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 911 { 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 916 PetscValidLogicalCollectiveEnum(pc,type,2); 917 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCGASMSetSortIndices" 923 /*@ 924 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 925 926 Logically Collective on PC 927 928 Input Parameters: 929 + pc - the preconditioner context 930 - doSort - sort the subdomain indices 931 932 Level: intermediate 933 934 .keywords: PC, GASM, set, type 935 936 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 937 PCGASMCreateSubdomains2D() 938 @*/ 939 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 940 { 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 945 PetscValidLogicalCollectiveBool(pc,doSort,2); 946 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "PCGASMGetSubKSP" 952 /*@C 953 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 954 this processor. 955 956 Collective on PC iff first_local is requested 957 958 Input Parameter: 959 . pc - the preconditioner context 960 961 Output Parameters: 962 + n_local - the number of blocks on this processor or PETSC_NULL 963 . first_local - the global number of the first block on this processor or PETSC_NULL, 964 all processors must request or all must pass PETSC_NULL 965 - ksp - the array of KSP contexts 966 967 Note: 968 After PCGASMGetSubKSP() the array of KSPes is not to be freed 969 970 Currently for some matrix implementations only 1 block per processor 971 is supported. 972 973 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 974 975 Level: advanced 976 977 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 978 979 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 980 PCGASMCreateSubdomains2D(), 981 @*/ 982 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 988 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 /* -------------------------------------------------------------------------------------*/ 993 /*MC 994 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 995 its own KSP object. 996 997 Options Database Keys: 998 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 999 . -pc_gasm_blocks <blks> - Sets total blocks 1000 . -pc_gasm_overlap <ovl> - Sets overlap 1001 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1002 1003 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1004 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1005 -pc_gasm_type basic to use the standard GASM. 1006 1007 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1008 than one processor. Defaults to one block per processor. 1009 1010 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1011 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1012 1013 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1014 and set the options directly on the resulting KSP object (you can access its PC 1015 with KSPGetPC()) 1016 1017 1018 Level: beginner 1019 1020 Concepts: additive Schwarz method 1021 1022 References: 1023 An additive variant of the Schwarz alternating method for the case of many subregions 1024 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1025 1026 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1027 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1028 1029 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1030 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1031 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1032 1033 M*/ 1034 1035 EXTERN_C_BEGIN 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "PCCreate_GASM" 1038 PetscErrorCode PCCreate_GASM(PC pc) 1039 { 1040 PetscErrorCode ierr; 1041 PC_GASM *osm; 1042 1043 PetscFunctionBegin; 1044 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1045 osm->N = PETSC_DECIDE; 1046 osm->n = 0; 1047 osm->nmax = 0; 1048 osm->overlap = 1; 1049 osm->ksp = 0; 1050 osm->grestriction = 0; 1051 osm->gprolongation = 0; 1052 osm->gx = 0; 1053 osm->gy = 0; 1054 osm->x = 0; 1055 osm->y = 0; 1056 osm->is = 0; 1057 osm->is_local = 0; 1058 osm->pmat = 0; 1059 osm->type = PC_GASM_RESTRICT; 1060 osm->same_local_solves = PETSC_TRUE; 1061 osm->sort_indices = PETSC_TRUE; 1062 1063 pc->data = (void*)osm; 1064 pc->ops->apply = PCApply_GASM; 1065 pc->ops->applytranspose = PCApplyTranspose_GASM; 1066 pc->ops->setup = PCSetUp_GASM; 1067 pc->ops->reset = PCReset_GASM; 1068 pc->ops->destroy = PCDestroy_GASM; 1069 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1070 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1071 pc->ops->view = PCView_GASM; 1072 pc->ops->applyrichardson = 0; 1073 1074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1075 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1077 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1079 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1081 PCGASMSetType_GASM);CHKERRQ(ierr); 1082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1083 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1085 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 EXTERN_C_END 1089 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "PCGASMCreateSubdomains" 1093 /*@C 1094 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1095 preconditioner for a any problem on a general grid. 1096 1097 Collective 1098 1099 Input Parameters: 1100 + A - The global matrix operator 1101 - n - the number of local blocks 1102 1103 Output Parameters: 1104 . outis - the array of index sets defining the subdomains 1105 1106 Level: advanced 1107 1108 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1109 from these if you use PCGASMSetLocalSubdomains() 1110 1111 In the Fortran version you must provide the array outis[] already allocated of length n. 1112 1113 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1114 1115 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1116 @*/ 1117 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1118 { 1119 MatPartitioning mpart; 1120 const char *prefix; 1121 PetscErrorCode (*f)(Mat,PetscBool *,MatReuse,Mat*); 1122 PetscMPIInt size; 1123 PetscInt i,j,rstart,rend,bs; 1124 PetscBool iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1125 Mat Ad = PETSC_NULL, adj; 1126 IS ispart,isnumb,*is; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1131 PetscValidPointer(outis,3); 1132 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1133 1134 /* Get prefix, row distribution, and block size */ 1135 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1137 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1138 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1139 1140 /* Get diagonal block from matrix if possible */ 1141 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1142 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1143 if (f) { 1144 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1145 } else if (size == 1) { 1146 iscopy = PETSC_FALSE; Ad = A; 1147 } else { 1148 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1149 } 1150 if (Ad) { 1151 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1152 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1153 } 1154 if (Ad && n > 1) { 1155 PetscBool match,done; 1156 /* Try to setup a good matrix partitioning if available */ 1157 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1158 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1159 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1160 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1161 if (!match) { 1162 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1163 } 1164 if (!match) { /* assume a "good" partitioner is available */ 1165 PetscInt na,*ia,*ja; 1166 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1167 if (done) { 1168 /* Build adjacency matrix by hand. Unfortunately a call to 1169 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1170 remove the block-aij structure and we cannot expect 1171 MatPartitioning to split vertices as we need */ 1172 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1173 nnz = 0; 1174 for (i=0; i<na; i++) { /* count number of nonzeros */ 1175 len = ia[i+1] - ia[i]; 1176 row = ja + ia[i]; 1177 for (j=0; j<len; j++) { 1178 if (row[j] == i) { /* don't count diagonal */ 1179 len--; break; 1180 } 1181 } 1182 nnz += len; 1183 } 1184 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1185 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1186 nnz = 0; 1187 iia[0] = 0; 1188 for (i=0; i<na; i++) { /* fill adjacency */ 1189 cnt = 0; 1190 len = ia[i+1] - ia[i]; 1191 row = ja + ia[i]; 1192 for (j=0; j<len; j++) { 1193 if (row[j] != i) { /* if not diagonal */ 1194 jja[nnz+cnt++] = row[j]; 1195 } 1196 } 1197 nnz += cnt; 1198 iia[i+1] = nnz; 1199 } 1200 /* Partitioning of the adjacency matrix */ 1201 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1202 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1203 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1204 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1205 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1206 ierr = MatDestroy(adj);CHKERRQ(ierr); 1207 foundpart = PETSC_TRUE; 1208 } 1209 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1210 } 1211 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1212 } 1213 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1214 1215 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1216 *outis = is; 1217 1218 if (!foundpart) { 1219 1220 /* Partitioning by contiguous chunks of rows */ 1221 1222 PetscInt mbs = (rend-rstart)/bs; 1223 PetscInt start = rstart; 1224 for (i=0; i<n; i++) { 1225 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1226 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1227 start += count; 1228 } 1229 1230 } else { 1231 1232 /* Partitioning by adjacency of diagonal block */ 1233 1234 const PetscInt *numbering; 1235 PetscInt *count,nidx,*indices,*newidx,start=0; 1236 /* Get node count in each partition */ 1237 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1238 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1239 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1240 for (i=0; i<n; i++) count[i] *= bs; 1241 } 1242 /* Build indices from node numbering */ 1243 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1244 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1245 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1246 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1247 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1248 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1249 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1250 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1251 for (i=0; i<nidx; i++) 1252 for (j=0; j<bs; j++) 1253 newidx[i*bs+j] = indices[i]*bs + j; 1254 ierr = PetscFree(indices);CHKERRQ(ierr); 1255 nidx *= bs; 1256 indices = newidx; 1257 } 1258 /* Shift to get global indices */ 1259 for (i=0; i<nidx; i++) indices[i] += rstart; 1260 1261 /* Build the index sets for each block */ 1262 for (i=0; i<n; i++) { 1263 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1264 ierr = ISSort(is[i]);CHKERRQ(ierr); 1265 start += count[i]; 1266 } 1267 1268 ierr = PetscFree(count); 1269 ierr = PetscFree(indices); 1270 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1271 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1272 } 1273 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "PCGASMDestroySubdomains" 1279 /*@C 1280 PCGASMDestroySubdomains - Destroys the index sets created with 1281 PCGASMCreateSubdomains(). Should be called after setting subdomains 1282 with PCGASMSetLocalSubdomains(). 1283 1284 Collective 1285 1286 Input Parameters: 1287 + n - the number of index sets 1288 . is - the array of index sets 1289 - is_local - the array of local index sets, can be PETSC_NULL 1290 1291 Level: advanced 1292 1293 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1294 1295 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1296 @*/ 1297 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1298 { 1299 PetscInt i; 1300 PetscErrorCode ierr; 1301 PetscFunctionBegin; 1302 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1303 PetscValidPointer(is,2); 1304 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1305 ierr = PetscFree(is);CHKERRQ(ierr); 1306 if (is_local) { 1307 PetscValidPointer(is_local,3); 1308 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1309 ierr = PetscFree(is_local);CHKERRQ(ierr); 1310 } 1311 PetscFunctionReturn(0); 1312 } 1313 1314 1315 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1316 { \ 1317 PetscInt first_row = first/M, last_row = last/M+1; \ 1318 /* \ 1319 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1320 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1321 subdomain). \ 1322 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1323 of the intersection. \ 1324 */ \ 1325 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1326 *ylow_loc = PetscMax(first_row,ylow); \ 1327 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1328 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1329 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1330 *yhigh_loc = PetscMin(last_row,yhigh); \ 1331 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1332 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1333 /* Now compute the size of the local subdomain n. */ \ 1334 *n = 0; \ 1335 if(*ylow_loc < *yhigh_loc) { \ 1336 PetscInt width = xright-xleft; \ 1337 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1338 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1339 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1340 }\ 1341 } 1342 1343 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1347 /*@ 1348 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1349 preconditioner for a two-dimensional problem on a regular grid. 1350 1351 Collective 1352 1353 Input Parameters: 1354 + M, N - the global number of mesh points in the x and y directions 1355 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1356 . dof - degrees of freedom per node 1357 - overlap - overlap in mesh lines 1358 1359 Output Parameters: 1360 + Nsub - the number of local subdomains created 1361 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1362 - is_local - array of index sets defining non-overlapping subdomains 1363 1364 Note: 1365 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1366 preconditioners. More general related routines are 1367 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1368 1369 Level: advanced 1370 1371 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1372 1373 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1374 PCGASMSetOverlap() 1375 @*/ 1376 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1377 { 1378 PetscErrorCode ierr; 1379 PetscMPIInt size, rank; 1380 PetscInt i, j; 1381 PetscInt maxheight, maxwidth; 1382 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1383 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1384 PetscInt x[2][2], y[2][2], n[2]; 1385 PetscInt first, last; 1386 PetscInt nidx, *idx; 1387 PetscInt ii,jj,s,q,d; 1388 PetscInt k,kk; 1389 PetscMPIInt color; 1390 MPI_Comm comm, subcomm; 1391 1392 IS **iis; 1393 PetscFunctionBegin; 1394 ierr = PetscObjectGetComm((PetscObject)pc, &comm); CHKERRQ(ierr); 1395 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 1396 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 1397 ierr = MatGetOwnershipRange(pc->pmat, &first, &last); CHKERRQ(ierr); 1398 if(first%dof || last%dof) { 1399 SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1400 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1401 } 1402 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1403 s = 0; 1404 ystart = 0; 1405 for (j=0; j<Ndomains; ++j) { 1406 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1407 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1408 /* Vertical domain limits with an overlap. */ 1409 ylow = PetscMax(ystart - overlap,0); 1410 yhigh = PetscMin(ystart + maxheight + overlap,N); 1411 xstart = 0; 1412 for (i=0; i<Mdomains; ++i) { 1413 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1414 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1415 /* Horizontal domain limits with an overlap. */ 1416 xleft = PetscMax(xstart - overlap,0); 1417 xright = PetscMin(xstart + maxwidth + overlap,M); 1418 /* 1419 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1420 */ 1421 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1422 if(nidx) { 1423 ++s; 1424 } 1425 xstart += maxwidth; 1426 }/* for(i = 0; i < Mdomains; ++i) */ 1427 ystart += maxheight; 1428 }/* for(j = 0; j < Ndomains; ++j) */ 1429 /* Now we can allocate the necessary number of ISs. */ 1430 *nsub = s; 1431 ierr = PetscMalloc((*nsub)*sizeof(IS*),is); CHKERRQ(ierr); 1432 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local); CHKERRQ(ierr); 1433 s = 0; 1434 ystart = 0; 1435 for (j=0; j<Ndomains; ++j) { 1436 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1437 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1438 /* Vertical domain limits with an overlap. */ 1439 y[0][0] = PetscMax(ystart - overlap,0); 1440 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1441 /* Vertical domain limits without an overlap. */ 1442 y[1][0] = ystart; 1443 y[1][1] = PetscMin(ystart + maxheight,N); 1444 xstart = 0; 1445 for (i=0; i<Mdomains; ++i) { 1446 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1447 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1448 /* Horizontal domain limits with an overlap. */ 1449 x[0][0] = PetscMax(xstart - overlap,0); 1450 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1451 /* Horizontal domain limits without an overlap. */ 1452 x[1][0] = xstart; 1453 x[1][1] = PetscMin(xstart+maxwidth,M); 1454 /* 1455 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1456 Do this twice: first for the domains with overlaps, and once without. 1457 During the first pass create the subcommunicators, and use them on the second pass as well. 1458 */ 1459 for(q = 0; q < 2; ++q) { 1460 /* 1461 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1462 according to whether the domain with an overlap or without is considered. 1463 */ 1464 xleft = x[q][0]; xright = x[q][1]; 1465 ylow = y[q][0]; yhigh = y[q][1]; 1466 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1467 nidx *= dof; 1468 n[q] = nidx; 1469 /* 1470 Based on the counted number of indices in the local domain *with an overlap*, 1471 construct a subcommunicator of all the processors supporting this domain. 1472 Observe that a domain with an overlap might have nontrivial local support, 1473 while the domain without an overlap might not. Hence, the decision to participate 1474 in the subcommunicator must be based on the domain with an overlap. 1475 */ 1476 if(q == 0) { 1477 if(nidx) { 1478 color = 1; 1479 } 1480 else { 1481 color = MPI_UNDEFINED; 1482 } 1483 ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr); 1484 } 1485 /* 1486 Proceed only if the number of local indices *with an overlap* is nonzero. 1487 */ 1488 if(n[0]) { 1489 if(q == 0) { 1490 iis = is; 1491 } 1492 if(q == 1) { 1493 /* 1494 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1495 Moreover, if the overlap is zero, the two ISs are identical. 1496 */ 1497 if(overlap == 0) { 1498 (*is_local)[s] = (*is)[s]; 1499 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1500 continue; 1501 } 1502 else { 1503 iis = is_local; 1504 subcomm = ((PetscObject)(*is)[s])->comm; 1505 } 1506 }/* if(q == 1) */ 1507 idx = PETSC_NULL; 1508 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1509 if(nidx) { 1510 k = 0; 1511 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1512 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1513 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1514 kk = dof*(M*jj + x0); 1515 for (ii=x0; ii<x1; ++ii) { 1516 for(d = 0; d < dof; ++d) { 1517 idx[k++] = kk++; 1518 } 1519 } 1520 } 1521 } 1522 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1523 }/* if(n[0]) */ 1524 }/* for(q = 0; q < 2; ++q) */ 1525 if(n[0]) { 1526 ++s; 1527 } 1528 xstart += maxwidth; 1529 }/* for(i = 0; i < Mdomains; ++i) */ 1530 ystart += maxheight; 1531 }/* for(j = 0; j < Ndomains; ++j) */ 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1537 /*@C 1538 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1539 only) for the additive Schwarz preconditioner. 1540 1541 Not Collective 1542 1543 Input Parameter: 1544 . pc - the preconditioner context 1545 1546 Output Parameters: 1547 + n - the number of subdomains for this processor (default value = 1) 1548 . is - the index sets that define the subdomains for this processor 1549 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1550 1551 1552 Notes: 1553 The IS numbering is in the parallel, global numbering of the vector. 1554 1555 Level: advanced 1556 1557 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1558 1559 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1560 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1561 @*/ 1562 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1563 { 1564 PC_GASM *osm; 1565 PetscErrorCode ierr; 1566 PetscBool match; 1567 1568 PetscFunctionBegin; 1569 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1570 PetscValidIntPointer(n,2); 1571 if (is) PetscValidPointer(is,3); 1572 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1573 if (!match) { 1574 if (n) *n = 0; 1575 if (is) *is = PETSC_NULL; 1576 } else { 1577 osm = (PC_GASM*)pc->data; 1578 if (n) *n = osm->n; 1579 if (is) *is = osm->is; 1580 if (is_local) *is_local = osm->is_local; 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1587 /*@C 1588 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1589 only) for the additive Schwarz preconditioner. 1590 1591 Not Collective 1592 1593 Input Parameter: 1594 . pc - the preconditioner context 1595 1596 Output Parameters: 1597 + n - the number of matrices for this processor (default value = 1) 1598 - mat - the matrices 1599 1600 1601 Level: advanced 1602 1603 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1604 1605 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1606 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1607 @*/ 1608 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1609 { 1610 PC_GASM *osm; 1611 PetscErrorCode ierr; 1612 PetscBool match; 1613 1614 PetscFunctionBegin; 1615 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1616 PetscValidIntPointer(n,2); 1617 if (mat) PetscValidPointer(mat,3); 1618 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1619 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1620 if (!match) { 1621 if (n) *n = 0; 1622 if (mat) *mat = PETSC_NULL; 1623 } else { 1624 osm = (PC_GASM*)pc->data; 1625 if (n) *n = osm->n; 1626 if (mat) *mat = osm->pmat; 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630