1 2 /* 3 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 4 In this version each processor may have any number of subdomains and a subdomain may live on multiple 5 processors. 6 7 N - total number of subdomains on all processors 8 n - actual number of subdomains on this processor 9 nmax - maximum number of subdomains per processor 10 */ 11 #include <private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 IS gis, gis_local; /* merged ISs */ 20 VecScatter grestriction; /* merged restriction */ 21 VecScatter gprolongation; /* merged prolongation */ 22 IS *is; /* index set that defines each overlapping subdomain */ 23 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 24 Mat *pmat; /* subdomain block matrices */ 25 PCGASMType type; /* use reduced interpolation, restriction or both */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMPrintSubdomains" 33 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 const char *prefix; 37 char fname[PETSC_MAX_PATH_LEN+1]; 38 PetscViewer viewer, sviewer; 39 PetscInt i,j,nidx; 40 const PetscInt *idx; 41 char *cidx; 42 PetscBool found; 43 PetscMPIInt size, rank; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 48 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 49 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 50 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 51 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 52 for (i=0;i<osm->n;++i) { 53 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 54 ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr); 55 /* 56 No more than 15 characters per index plus a space. 57 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 58 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 59 For nidx == 0, the whole string 16 '\0'. 60 */ 61 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 62 ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr); 63 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 64 for(j = 0; j < nidx; ++j) { 65 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 66 } 67 ierr = PetscViewerDestroy(sviewer);CHKERRQ(ierr); 68 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 69 70 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr); 71 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 72 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 73 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 74 ierr = PetscFree(cidx);CHKERRQ(ierr); 75 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 76 if (osm->is_local) { 77 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 78 ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr); 79 /* 80 No more than 15 characters per index plus a space. 81 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 82 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 83 For nidx == 0, the whole string 16 '\0'. 84 */ 85 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 86 ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr); 87 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 88 for(j = 0; j < nidx; ++j) { 89 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 90 } 91 ierr = PetscViewerDestroy(sviewer);CHKERRQ(ierr); 92 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 93 94 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr); 95 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 96 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 97 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 98 ierr = PetscFree(cidx);CHKERRQ(ierr); 99 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 100 } 101 } 102 PetscFunctionReturn(0); 103 } 104 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "PCView_GASM" 108 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 109 { 110 PC_GASM *osm = (PC_GASM*)pc->data; 111 const char *prefix; 112 PetscErrorCode ierr; 113 PetscMPIInt rank, size; 114 PetscInt i,bsz; 115 PetscBool iascii,isstring, print_subdomains; 116 PetscViewer sviewer; 117 118 119 PetscFunctionBegin; 120 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 121 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 122 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 123 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 124 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 125 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 126 if (iascii) { 127 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 128 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 129 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 130 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 131 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 132 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 133 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 134 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 135 if (osm->same_local_solves) { 136 if (osm->ksp) { 137 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 138 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 139 if (!rank) { 140 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 141 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 142 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 143 } 144 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 145 } 146 } else { 147 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 149 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 150 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 151 for (i=0; i<osm->nmax; i++) { 152 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 153 if (i < osm->n) { 154 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 155 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 156 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 157 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 158 } 159 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 160 } 161 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 162 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 163 } 164 } else if (isstring) { 165 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 166 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 167 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 169 } else { 170 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 171 } 172 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 173 if(print_subdomains) { 174 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 180 181 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "PCSetUp_GASM" 185 static PetscErrorCode PCSetUp_GASM(PC pc) 186 { 187 PC_GASM *osm = (PC_GASM*)pc->data; 188 PetscErrorCode ierr; 189 PetscBool symset,flg; 190 PetscInt i; 191 PetscMPIInt rank, size; 192 MatReuse scall = MAT_REUSE_MATRIX; 193 KSP ksp; 194 PC subpc; 195 const char *prefix,*pprefix; 196 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 197 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 198 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 199 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 200 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 201 Vec x,y; 202 PetscScalar *gxarray, *gyarray; 203 PetscInt gfirst, glast; 204 205 PetscFunctionBegin; 206 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 207 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 208 if (!pc->setupcalled) { 209 210 if (!osm->type_set) { 211 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 212 if (symset && flg) { osm->type = PC_GASM_BASIC; } 213 } 214 215 if (osm->N == PETSC_DECIDE && osm->n < 1) { 216 /* no subdomains given, use one per processor */ 217 osm->nmax = osm->n = 1; 218 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 219 osm->N = size; 220 } else if (osm->N == PETSC_DECIDE) { 221 PetscInt inwork[2], outwork[2]; 222 /* determine global number of subdomains and the max number of local subdomains */ 223 inwork[0] = inwork[1] = osm->n; 224 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 225 osm->nmax = outwork[0]; 226 osm->N = outwork[1]; 227 } 228 if (!osm->is){ /* create the index sets */ 229 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 230 } 231 if (!osm->is_local) { 232 /* 233 This indicates that osm->is should define a nonoverlapping decomposition 234 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 235 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 236 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 237 Therefore, osm->is will be used to define osm->is_local. 238 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 239 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 240 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 241 */ 242 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 243 for (i=0; i<osm->n; i++) { 244 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 245 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 246 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 247 } else { 248 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 249 osm->is_local[i] = osm->is[i]; 250 } 251 } 252 } 253 /* Beyond this point osm->is_local is not null. */ 254 if (osm->overlap > 0) { 255 /* Extend the "overlapping" regions by a number of steps */ 256 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 257 } 258 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 259 flg = PETSC_FALSE; 260 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 261 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 262 263 if (osm->sort_indices) { 264 for (i=0; i<osm->n; i++) { 265 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 266 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 267 } 268 } 269 /* Merge the ISs, create merged vectors and scatter contexts. */ 270 /* Restriction ISs. */ 271 ddn = 0; 272 for (i=0; i<osm->n; i++) { 273 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 274 ddn += dn; 275 } 276 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr); 277 ddn = 0; 278 for (i=0; i<osm->n; i++) { 279 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 280 ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr); 281 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr); 282 ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr); 283 ddn += dn; 284 } 285 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr); 286 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 287 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 288 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 289 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 290 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr); 291 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr); 292 ierr = ISDestroy(gid);CHKERRQ(ierr); 293 /* Prolongation ISs */ 294 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 295 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 296 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 297 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 298 /**/ 299 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 300 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 301 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 302 IS gis_llocal; /* IS with ddidx_llocal indices. */ 303 PetscInt j; 304 ddn_local = 0; 305 for (i=0; i<osm->n; i++) { 306 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 307 ddn_local += dn_local; 308 } 309 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr); 310 ddn_local = 0; 311 for (i=0; i<osm->n; i++) { 312 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 313 ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr); 314 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr); 315 ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr); 316 ddn_local += dn_local; 317 } 318 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr); 319 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr); 320 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 321 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 322 ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr); 323 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 324 /* Now convert these localized indices into the global indices into the merged output vector. */ 325 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr); 326 for(j=0; j < ddn_llocal; ++j) { 327 ddidx_llocal[j] += gfirst; 328 } 329 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr); 330 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr); 331 ierr = ISDestroy(gis_llocal);CHKERRQ(ierr); 332 } 333 /* Create the subdomain work vectors. */ 334 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 335 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 336 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 337 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 338 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 339 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 340 PetscInt dN; 341 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 342 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 343 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 344 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 345 } 346 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 347 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 348 ierr = VecDestroy(x);CHKERRQ(ierr); 349 ierr = VecDestroy(y);CHKERRQ(ierr); 350 /* Create the local solvers */ 351 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 352 for (i=0; i<osm->n; i++) { /* KSPs are local */ 353 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 354 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 355 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 356 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 357 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 358 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 359 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 360 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 361 osm->ksp[i] = ksp; 362 } 363 scall = MAT_INITIAL_MATRIX; 364 365 } else { 366 /* 367 Destroy the blocks from the previous iteration 368 */ 369 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 370 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 371 scall = MAT_INITIAL_MATRIX; 372 } 373 } 374 375 /* 376 Extract out the submatrices. 377 */ 378 if(size > 1) { 379 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 380 } 381 else { 382 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 383 } 384 if (scall == MAT_INITIAL_MATRIX) { 385 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 386 for (i=0; i<osm->n; i++) { 387 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 388 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 389 } 390 } 391 392 /* Return control to the user so that the submatrices can be modified (e.g., to apply 393 different boundary conditions for the submatrices than for the global problem) */ 394 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 395 396 /* 397 Loop over submatrices putting them into local ksp 398 */ 399 for (i=0; i<osm->n; i++) { 400 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 401 if (!pc->setupcalled) { 402 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 403 } 404 } 405 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 411 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 412 { 413 PC_GASM *osm = (PC_GASM*)pc->data; 414 PetscErrorCode ierr; 415 PetscInt i; 416 417 PetscFunctionBegin; 418 for (i=0; i<osm->n; i++) { 419 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "PCApply_GASM" 426 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 427 { 428 PC_GASM *osm = (PC_GASM*)pc->data; 429 PetscErrorCode ierr; 430 PetscInt i; 431 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 432 433 PetscFunctionBegin; 434 /* 435 Support for limiting the restriction or interpolation to only local 436 subdomain values (leaving the other values 0). 437 */ 438 if (!(osm->type & PC_GASM_RESTRICT)) { 439 forward = SCATTER_FORWARD_LOCAL; 440 /* have to zero the work RHS since scatter may leave some slots empty */ 441 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 442 } 443 if (!(osm->type & PC_GASM_INTERPOLATE)) { 444 reverse = SCATTER_REVERSE_LOCAL; 445 } 446 447 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 448 ierr = VecZeroEntries(y);CHKERRQ(ierr); 449 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 450 /* do the subdomain solves */ 451 for (i=0; i<osm->n; ++i) { 452 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 453 } 454 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 455 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCApplyTranspose_GASM" 461 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 462 { 463 PC_GASM *osm = (PC_GASM*)pc->data; 464 PetscErrorCode ierr; 465 PetscInt i; 466 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 467 468 PetscFunctionBegin; 469 /* 470 Support for limiting the restriction or interpolation to only local 471 subdomain values (leaving the other values 0). 472 473 Note: these are reversed from the PCApply_GASM() because we are applying the 474 transpose of the three terms 475 */ 476 if (!(osm->type & PC_GASM_INTERPOLATE)) { 477 forward = SCATTER_FORWARD_LOCAL; 478 /* have to zero the work RHS since scatter may leave some slots empty */ 479 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 480 } 481 if (!(osm->type & PC_GASM_RESTRICT)) { 482 reverse = SCATTER_REVERSE_LOCAL; 483 } 484 485 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 486 ierr = VecZeroEntries(y);CHKERRQ(ierr); 487 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 488 /* do the local solves */ 489 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 490 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 491 } 492 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 493 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "PCReset_GASM" 499 static PetscErrorCode PCReset_GASM(PC pc) 500 { 501 PC_GASM *osm = (PC_GASM*)pc->data; 502 PetscErrorCode ierr; 503 PetscInt i; 504 505 PetscFunctionBegin; 506 if (osm->ksp) { 507 for (i=0; i<osm->n; i++) { 508 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 509 } 510 } 511 if (osm->pmat) { 512 if (osm->n > 0) { 513 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 514 } 515 } 516 if (osm->x) { 517 for (i=0; i<osm->n; i++) { 518 if (osm->x[i]) {ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);} 519 if (osm->y[i]) {ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);} 520 } 521 } 522 if (osm->gx) {ierr = VecDestroy(osm->gx);CHKERRQ(ierr);} 523 if (osm->gy) {ierr = VecDestroy(osm->gy);CHKERRQ(ierr);} 524 525 if (osm->grestriction) {ierr = VecScatterDestroy(osm->grestriction);CHKERRQ(ierr);} 526 if (osm->gprolongation) {ierr = VecScatterDestroy(osm->gprolongation);CHKERRQ(ierr);} 527 if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;} 528 if (osm->gis) {ierr = ISDestroy(osm->gis);CHKERRQ(ierr);} 529 if (osm->gis_local) {ierr = ISDestroy(osm->gis_local);CHKERRQ(ierr);} 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "PCDestroy_GASM" 535 static PetscErrorCode PCDestroy_GASM(PC pc) 536 { 537 PC_GASM *osm = (PC_GASM*)pc->data; 538 PetscErrorCode ierr; 539 PetscInt i; 540 541 PetscFunctionBegin; 542 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 543 if (osm->ksp) { 544 for (i=0; i<osm->n; i++) { 545 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 546 } 547 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 548 } 549 ierr = PetscFree(osm->x);CHKERRQ(ierr); 550 ierr = PetscFree(osm->y);CHKERRQ(ierr); 551 ierr = PetscFree(pc->data);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "PCSetFromOptions_GASM" 557 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 558 PC_GASM *osm = (PC_GASM*)pc->data; 559 PetscErrorCode ierr; 560 PetscInt blocks,ovl; 561 PetscBool symset,flg; 562 PCGASMType gasmtype; 563 564 PetscFunctionBegin; 565 /* set the type to symmetric if matrix is symmetric */ 566 if (!osm->type_set && pc->pmat) { 567 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 568 if (symset && flg) { osm->type = PC_GASM_BASIC; } 569 } 570 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 571 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 572 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 573 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 574 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 575 flg = PETSC_FALSE; 576 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 577 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 578 ierr = PetscOptionsTail();CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 /*------------------------------------------------------------------------------------*/ 583 584 EXTERN_C_BEGIN 585 #undef __FUNCT__ 586 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 587 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 588 { 589 PC_GASM *osm = (PC_GASM*)pc->data; 590 PetscErrorCode ierr; 591 PetscInt i; 592 593 PetscFunctionBegin; 594 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 595 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 596 597 if (!pc->setupcalled) { 598 osm->n = n; 599 osm->is = 0; 600 osm->is_local = 0; 601 if (is) { 602 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 603 } 604 if (is_local) { 605 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 606 } 607 if (osm->is) { 608 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 609 } 610 if (is) { 611 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 612 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 613 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 614 osm->overlap = -1; 615 } 616 if (is_local) { 617 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 618 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 EXTERN_C_END 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 628 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 629 PC_GASM *osm = (PC_GASM*)pc->data; 630 PetscErrorCode ierr; 631 PetscMPIInt rank,size; 632 PetscInt n; 633 634 PetscFunctionBegin; 635 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 636 637 /* 638 Split the subdomains equally among all processors 639 */ 640 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 641 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 642 n = N/size + ((N % size) > rank); 643 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 644 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 645 if (!pc->setupcalled) { 646 if (osm->is) { 647 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 648 } 649 osm->N = N; 650 osm->n = n; 651 osm->is = 0; 652 osm->is_local = 0; 653 } 654 PetscFunctionReturn(0); 655 }/* PCGASMSetTotalSubdomains_GASM() */ 656 EXTERN_C_END 657 658 EXTERN_C_BEGIN 659 #undef __FUNCT__ 660 #define __FUNCT__ "PCGASMSetOverlap_GASM" 661 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 662 { 663 PC_GASM *osm = (PC_GASM*)pc->data; 664 665 PetscFunctionBegin; 666 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 667 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 668 if (!pc->setupcalled) { 669 osm->overlap = ovl; 670 } 671 PetscFunctionReturn(0); 672 } 673 EXTERN_C_END 674 675 EXTERN_C_BEGIN 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCGASMSetType_GASM" 678 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 679 { 680 PC_GASM *osm = (PC_GASM*)pc->data; 681 682 PetscFunctionBegin; 683 osm->type = type; 684 osm->type_set = PETSC_TRUE; 685 PetscFunctionReturn(0); 686 } 687 EXTERN_C_END 688 689 EXTERN_C_BEGIN 690 #undef __FUNCT__ 691 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 692 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 693 { 694 PC_GASM *osm = (PC_GASM*)pc->data; 695 696 PetscFunctionBegin; 697 osm->sort_indices = doSort; 698 PetscFunctionReturn(0); 699 } 700 EXTERN_C_END 701 702 EXTERN_C_BEGIN 703 #undef __FUNCT__ 704 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 705 /* 706 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 707 In particular, it would upset the global subdomain number calculation. 708 */ 709 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 710 { 711 PC_GASM *osm = (PC_GASM*)pc->data; 712 PetscErrorCode ierr; 713 714 PetscFunctionBegin; 715 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 716 717 if (n) { 718 *n = osm->n; 719 } 720 if (first) { 721 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 722 *first -= osm->n; 723 } 724 if (ksp) { 725 /* Assume that local solves are now different; not necessarily 726 true though! This flag is used only for PCView_GASM() */ 727 *ksp = osm->ksp; 728 osm->same_local_solves = PETSC_FALSE; 729 } 730 PetscFunctionReturn(0); 731 }/* PCGASMGetSubKSP_GASM() */ 732 EXTERN_C_END 733 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "PCGASMSetLocalSubdomains" 737 /*@C 738 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 739 only) for the additive Schwarz preconditioner. 740 741 Collective on PC 742 743 Input Parameters: 744 + pc - the preconditioner context 745 . n - the number of subdomains for this processor (default value = 1) 746 . is - the index set that defines the subdomains for this processor 747 (or PETSC_NULL for PETSc to determine subdomains) 748 - is_local - the index sets that define the local part of the subdomains for this processor 749 (or PETSC_NULL to use the default of 1 subdomain per process) 750 751 Notes: 752 The IS numbering is in the parallel, global numbering of the vector. 753 754 By default the GASM preconditioner uses 1 block per processor. 755 756 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 757 758 Level: advanced 759 760 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 761 762 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 763 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 764 @*/ 765 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 766 { 767 PetscErrorCode ierr; 768 769 PetscFunctionBegin; 770 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 771 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "PCGASMSetTotalSubdomains" 777 /*@C 778 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 779 additive Schwarz preconditioner. Either all or no processors in the 780 PC communicator must call this routine, with the same index sets. 781 782 Collective on PC 783 784 Input Parameters: 785 + pc - the preconditioner context 786 . n - the number of subdomains for all processors 787 . is - the index sets that define the subdomains for all processor 788 (or PETSC_NULL for PETSc to determine subdomains) 789 - is_local - the index sets that define the local part of the subdomains for this processor 790 (or PETSC_NULL to use the default of 1 subdomain per process) 791 792 Options Database Key: 793 To set the total number of subdomain blocks rather than specify the 794 index sets, use the option 795 . -pc_gasm_blocks <blks> - Sets total blocks 796 797 Notes: 798 Currently you cannot use this to set the actual subdomains with the argument is. 799 800 By default the GASM preconditioner uses 1 block per processor. 801 802 These index sets cannot be destroyed until after completion of the 803 linear solves for which the GASM preconditioner is being used. 804 805 Use PCGASMSetLocalSubdomains() to set local subdomains. 806 807 Level: advanced 808 809 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 810 811 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 812 PCGASMCreateSubdomains2D() 813 @*/ 814 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 815 { 816 PetscErrorCode ierr; 817 818 PetscFunctionBegin; 819 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 820 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "PCGASMSetOverlap" 826 /*@ 827 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 828 additive Schwarz preconditioner. Either all or no processors in the 829 PC communicator must call this routine. 830 831 Logically Collective on PC 832 833 Input Parameters: 834 + pc - the preconditioner context 835 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 836 837 Options Database Key: 838 . -pc_gasm_overlap <ovl> - Sets overlap 839 840 Notes: 841 By default the GASM preconditioner uses 1 block per processor. To use 842 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 843 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 844 845 The overlap defaults to 1, so if one desires that no additional 846 overlap be computed beyond what may have been set with a call to 847 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 848 must be set to be 0. In particular, if one does not explicitly set 849 the subdomains an application code, then all overlap would be computed 850 internally by PETSc, and using an overlap of 0 would result in an GASM 851 variant that is equivalent to the block Jacobi preconditioner. 852 853 Note that one can define initial index sets with any overlap via 854 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 855 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 856 if desired. 857 858 Level: intermediate 859 860 .keywords: PC, GASM, set, overlap 861 862 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 863 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 864 @*/ 865 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 866 { 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 871 PetscValidLogicalCollectiveInt(pc,ovl,2); 872 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCGASMSetType" 878 /*@ 879 PCGASMSetType - Sets the type of restriction and interpolation used 880 for local problems in the additive Schwarz method. 881 882 Logically Collective on PC 883 884 Input Parameters: 885 + pc - the preconditioner context 886 - type - variant of GASM, one of 887 .vb 888 PC_GASM_BASIC - full interpolation and restriction 889 PC_GASM_RESTRICT - full restriction, local processor interpolation 890 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 891 PC_GASM_NONE - local processor restriction and interpolation 892 .ve 893 894 Options Database Key: 895 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 896 897 Level: intermediate 898 899 .keywords: PC, GASM, set, type 900 901 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 902 PCGASMCreateSubdomains2D() 903 @*/ 904 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 905 { 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 910 PetscValidLogicalCollectiveEnum(pc,type,2); 911 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCGASMSetSortIndices" 917 /*@ 918 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 919 920 Logically Collective on PC 921 922 Input Parameters: 923 + pc - the preconditioner context 924 - doSort - sort the subdomain indices 925 926 Level: intermediate 927 928 .keywords: PC, GASM, set, type 929 930 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 931 PCGASMCreateSubdomains2D() 932 @*/ 933 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 934 { 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 939 PetscValidLogicalCollectiveBool(pc,doSort,2); 940 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "PCGASMGetSubKSP" 946 /*@C 947 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 948 this processor. 949 950 Collective on PC iff first_local is requested 951 952 Input Parameter: 953 . pc - the preconditioner context 954 955 Output Parameters: 956 + n_local - the number of blocks on this processor or PETSC_NULL 957 . first_local - the global number of the first block on this processor or PETSC_NULL, 958 all processors must request or all must pass PETSC_NULL 959 - ksp - the array of KSP contexts 960 961 Note: 962 After PCGASMGetSubKSP() the array of KSPes is not to be freed 963 964 Currently for some matrix implementations only 1 block per processor 965 is supported. 966 967 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 968 969 Level: advanced 970 971 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 972 973 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 974 PCGASMCreateSubdomains2D(), 975 @*/ 976 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 977 { 978 PetscErrorCode ierr; 979 980 PetscFunctionBegin; 981 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 982 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 /* -------------------------------------------------------------------------------------*/ 987 /*MC 988 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 989 its own KSP object. 990 991 Options Database Keys: 992 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 993 . -pc_gasm_blocks <blks> - Sets total blocks 994 . -pc_gasm_overlap <ovl> - Sets overlap 995 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 996 997 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 998 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 999 -pc_gasm_type basic to use the standard GASM. 1000 1001 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1002 than one processor. Defaults to one block per processor. 1003 1004 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1005 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1006 1007 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1008 and set the options directly on the resulting KSP object (you can access its PC 1009 with KSPGetPC()) 1010 1011 1012 Level: beginner 1013 1014 Concepts: additive Schwarz method 1015 1016 References: 1017 An additive variant of the Schwarz alternating method for the case of many subregions 1018 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1019 1020 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1021 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1022 1023 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1024 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1025 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1026 1027 M*/ 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "PCCreate_GASM" 1032 PetscErrorCode PCCreate_GASM(PC pc) 1033 { 1034 PetscErrorCode ierr; 1035 PC_GASM *osm; 1036 1037 PetscFunctionBegin; 1038 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1039 osm->N = PETSC_DECIDE; 1040 osm->n = 0; 1041 osm->nmax = 0; 1042 osm->overlap = 1; 1043 osm->ksp = 0; 1044 osm->grestriction = 0; 1045 osm->gprolongation = 0; 1046 osm->gx = 0; 1047 osm->gy = 0; 1048 osm->x = 0; 1049 osm->y = 0; 1050 osm->is = 0; 1051 osm->is_local = 0; 1052 osm->pmat = 0; 1053 osm->type = PC_GASM_RESTRICT; 1054 osm->same_local_solves = PETSC_TRUE; 1055 osm->sort_indices = PETSC_TRUE; 1056 1057 pc->data = (void*)osm; 1058 pc->ops->apply = PCApply_GASM; 1059 pc->ops->applytranspose = PCApplyTranspose_GASM; 1060 pc->ops->setup = PCSetUp_GASM; 1061 pc->ops->reset = PCReset_GASM; 1062 pc->ops->destroy = PCDestroy_GASM; 1063 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1064 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1065 pc->ops->view = PCView_GASM; 1066 pc->ops->applyrichardson = 0; 1067 1068 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1069 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1071 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1073 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1075 PCGASMSetType_GASM);CHKERRQ(ierr); 1076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1077 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1079 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 EXTERN_C_END 1083 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "PCGASMCreateSubdomains" 1087 /*@C 1088 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1089 preconditioner for a any problem on a general grid. 1090 1091 Collective 1092 1093 Input Parameters: 1094 + A - The global matrix operator 1095 - n - the number of local blocks 1096 1097 Output Parameters: 1098 . outis - the array of index sets defining the subdomains 1099 1100 Level: advanced 1101 1102 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1103 from these if you use PCGASMSetLocalSubdomains() 1104 1105 In the Fortran version you must provide the array outis[] already allocated of length n. 1106 1107 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1108 1109 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1110 @*/ 1111 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1112 { 1113 MatPartitioning mpart; 1114 const char *prefix; 1115 PetscErrorCode (*f)(Mat,PetscBool *,MatReuse,Mat*); 1116 PetscMPIInt size; 1117 PetscInt i,j,rstart,rend,bs; 1118 PetscBool iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1119 Mat Ad = PETSC_NULL, adj; 1120 IS ispart,isnumb,*is; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1125 PetscValidPointer(outis,3); 1126 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1127 1128 /* Get prefix, row distribution, and block size */ 1129 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1130 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1131 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1132 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1133 1134 /* Get diagonal block from matrix if possible */ 1135 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1136 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1137 if (f) { 1138 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1139 } else if (size == 1) { 1140 iscopy = PETSC_FALSE; Ad = A; 1141 } else { 1142 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1143 } 1144 if (Ad) { 1145 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1146 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1147 } 1148 if (Ad && n > 1) { 1149 PetscBool match,done; 1150 /* Try to setup a good matrix partitioning if available */ 1151 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1152 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1153 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1154 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1155 if (!match) { 1156 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1157 } 1158 if (!match) { /* assume a "good" partitioner is available */ 1159 PetscInt na,*ia,*ja; 1160 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1161 if (done) { 1162 /* Build adjacency matrix by hand. Unfortunately a call to 1163 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1164 remove the block-aij structure and we cannot expect 1165 MatPartitioning to split vertices as we need */ 1166 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1167 nnz = 0; 1168 for (i=0; i<na; i++) { /* count number of nonzeros */ 1169 len = ia[i+1] - ia[i]; 1170 row = ja + ia[i]; 1171 for (j=0; j<len; j++) { 1172 if (row[j] == i) { /* don't count diagonal */ 1173 len--; break; 1174 } 1175 } 1176 nnz += len; 1177 } 1178 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1179 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1180 nnz = 0; 1181 iia[0] = 0; 1182 for (i=0; i<na; i++) { /* fill adjacency */ 1183 cnt = 0; 1184 len = ia[i+1] - ia[i]; 1185 row = ja + ia[i]; 1186 for (j=0; j<len; j++) { 1187 if (row[j] != i) { /* if not diagonal */ 1188 jja[nnz+cnt++] = row[j]; 1189 } 1190 } 1191 nnz += cnt; 1192 iia[i+1] = nnz; 1193 } 1194 /* Partitioning of the adjacency matrix */ 1195 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1196 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1197 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1198 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1199 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1200 ierr = MatDestroy(adj);CHKERRQ(ierr); 1201 foundpart = PETSC_TRUE; 1202 } 1203 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1204 } 1205 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1206 } 1207 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1208 1209 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1210 *outis = is; 1211 1212 if (!foundpart) { 1213 1214 /* Partitioning by contiguous chunks of rows */ 1215 1216 PetscInt mbs = (rend-rstart)/bs; 1217 PetscInt start = rstart; 1218 for (i=0; i<n; i++) { 1219 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1220 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1221 start += count; 1222 } 1223 1224 } else { 1225 1226 /* Partitioning by adjacency of diagonal block */ 1227 1228 const PetscInt *numbering; 1229 PetscInt *count,nidx,*indices,*newidx,start=0; 1230 /* Get node count in each partition */ 1231 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1232 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1233 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1234 for (i=0; i<n; i++) count[i] *= bs; 1235 } 1236 /* Build indices from node numbering */ 1237 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1238 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1239 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1240 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1241 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1242 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1243 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1244 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1245 for (i=0; i<nidx; i++) 1246 for (j=0; j<bs; j++) 1247 newidx[i*bs+j] = indices[i]*bs + j; 1248 ierr = PetscFree(indices);CHKERRQ(ierr); 1249 nidx *= bs; 1250 indices = newidx; 1251 } 1252 /* Shift to get global indices */ 1253 for (i=0; i<nidx; i++) indices[i] += rstart; 1254 1255 /* Build the index sets for each block */ 1256 for (i=0; i<n; i++) { 1257 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1258 ierr = ISSort(is[i]);CHKERRQ(ierr); 1259 start += count[i]; 1260 } 1261 1262 ierr = PetscFree(count); 1263 ierr = PetscFree(indices); 1264 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1265 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1266 } 1267 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "PCGASMDestroySubdomains" 1273 /*@C 1274 PCGASMDestroySubdomains - Destroys the index sets created with 1275 PCGASMCreateSubdomains(). Should be called after setting subdomains 1276 with PCGASMSetLocalSubdomains(). 1277 1278 Collective 1279 1280 Input Parameters: 1281 + n - the number of index sets 1282 . is - the array of index sets 1283 - is_local - the array of local index sets, can be PETSC_NULL 1284 1285 Level: advanced 1286 1287 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1288 1289 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1290 @*/ 1291 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1292 { 1293 PetscInt i; 1294 PetscErrorCode ierr; 1295 PetscFunctionBegin; 1296 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1297 PetscValidPointer(is,2); 1298 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1299 ierr = PetscFree(is);CHKERRQ(ierr); 1300 if (is_local) { 1301 PetscValidPointer(is_local,3); 1302 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1303 ierr = PetscFree(is_local);CHKERRQ(ierr); 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 1309 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1310 { \ 1311 PetscInt first_row = first/M, last_row = last/M+1; \ 1312 /* \ 1313 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1314 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1315 subdomain). \ 1316 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1317 of the intersection. \ 1318 */ \ 1319 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1320 *ylow_loc = PetscMax(first_row,ylow); \ 1321 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1322 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1323 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1324 *yhigh_loc = PetscMin(last_row,yhigh); \ 1325 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1326 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1327 /* Now compute the size of the local subdomain n. */ \ 1328 *n = 0; \ 1329 if(*ylow_loc < *yhigh_loc) { \ 1330 PetscInt width = xright-xleft; \ 1331 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1332 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1333 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1334 }\ 1335 } 1336 1337 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1341 /*@ 1342 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1343 preconditioner for a two-dimensional problem on a regular grid. 1344 1345 Collective 1346 1347 Input Parameters: 1348 + M, N - the global number of mesh points in the x and y directions 1349 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1350 . dof - degrees of freedom per node 1351 - overlap - overlap in mesh lines 1352 1353 Output Parameters: 1354 + Nsub - the number of local subdomains created 1355 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1356 - is_local - array of index sets defining non-overlapping subdomains 1357 1358 Note: 1359 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1360 preconditioners. More general related routines are 1361 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1362 1363 Level: advanced 1364 1365 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1366 1367 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1368 PCGASMSetOverlap() 1369 @*/ 1370 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1371 { 1372 PetscErrorCode ierr; 1373 PetscMPIInt size, rank; 1374 PetscInt i, j; 1375 PetscInt maxheight, maxwidth; 1376 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1377 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1378 PetscInt x[2][2], y[2][2], n[2]; 1379 PetscInt first, last; 1380 PetscInt nidx, *idx; 1381 PetscInt ii,jj,s,q,d; 1382 PetscInt k,kk; 1383 PetscMPIInt color; 1384 MPI_Comm comm, subcomm; 1385 IS **iis = 0; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1389 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1390 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1391 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1392 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1393 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1394 1395 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1396 s = 0; 1397 ystart = 0; 1398 for (j=0; j<Ndomains; ++j) { 1399 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1400 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1401 /* Vertical domain limits with an overlap. */ 1402 ylow = PetscMax(ystart - overlap,0); 1403 yhigh = PetscMin(ystart + maxheight + overlap,N); 1404 xstart = 0; 1405 for (i=0; i<Mdomains; ++i) { 1406 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1407 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1408 /* Horizontal domain limits with an overlap. */ 1409 xleft = PetscMax(xstart - overlap,0); 1410 xright = PetscMin(xstart + maxwidth + overlap,M); 1411 /* 1412 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1413 */ 1414 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1415 if(nidx) { 1416 ++s; 1417 } 1418 xstart += maxwidth; 1419 }/* for(i = 0; i < Mdomains; ++i) */ 1420 ystart += maxheight; 1421 }/* for(j = 0; j < Ndomains; ++j) */ 1422 /* Now we can allocate the necessary number of ISs. */ 1423 *nsub = s; 1424 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1425 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1426 s = 0; 1427 ystart = 0; 1428 for (j=0; j<Ndomains; ++j) { 1429 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1430 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1431 /* Vertical domain limits with an overlap. */ 1432 y[0][0] = PetscMax(ystart - overlap,0); 1433 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1434 /* Vertical domain limits without an overlap. */ 1435 y[1][0] = ystart; 1436 y[1][1] = PetscMin(ystart + maxheight,N); 1437 xstart = 0; 1438 for (i=0; i<Mdomains; ++i) { 1439 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1440 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1441 /* Horizontal domain limits with an overlap. */ 1442 x[0][0] = PetscMax(xstart - overlap,0); 1443 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1444 /* Horizontal domain limits without an overlap. */ 1445 x[1][0] = xstart; 1446 x[1][1] = PetscMin(xstart+maxwidth,M); 1447 /* 1448 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1449 Do this twice: first for the domains with overlaps, and once without. 1450 During the first pass create the subcommunicators, and use them on the second pass as well. 1451 */ 1452 for(q = 0; q < 2; ++q) { 1453 /* 1454 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1455 according to whether the domain with an overlap or without is considered. 1456 */ 1457 xleft = x[q][0]; xright = x[q][1]; 1458 ylow = y[q][0]; yhigh = y[q][1]; 1459 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1460 nidx *= dof; 1461 n[q] = nidx; 1462 /* 1463 Based on the counted number of indices in the local domain *with an overlap*, 1464 construct a subcommunicator of all the processors supporting this domain. 1465 Observe that a domain with an overlap might have nontrivial local support, 1466 while the domain without an overlap might not. Hence, the decision to participate 1467 in the subcommunicator must be based on the domain with an overlap. 1468 */ 1469 if (q == 0) { 1470 if(nidx) { 1471 color = 1; 1472 } else { 1473 color = MPI_UNDEFINED; 1474 } 1475 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1476 } 1477 /* 1478 Proceed only if the number of local indices *with an overlap* is nonzero. 1479 */ 1480 if (n[0]) { 1481 if(q == 0) { 1482 iis = is; 1483 } 1484 if (q == 1) { 1485 /* 1486 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1487 Moreover, if the overlap is zero, the two ISs are identical. 1488 */ 1489 if (overlap == 0) { 1490 (*is_local)[s] = (*is)[s]; 1491 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1492 continue; 1493 } else { 1494 iis = is_local; 1495 subcomm = ((PetscObject)(*is)[s])->comm; 1496 } 1497 }/* if(q == 1) */ 1498 idx = PETSC_NULL; 1499 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1500 if(nidx) { 1501 k = 0; 1502 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1503 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1504 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1505 kk = dof*(M*jj + x0); 1506 for (ii=x0; ii<x1; ++ii) { 1507 for(d = 0; d < dof; ++d) { 1508 idx[k++] = kk++; 1509 } 1510 } 1511 } 1512 } 1513 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1514 }/* if(n[0]) */ 1515 }/* for(q = 0; q < 2; ++q) */ 1516 if(n[0]) { 1517 ++s; 1518 } 1519 xstart += maxwidth; 1520 }/* for(i = 0; i < Mdomains; ++i) */ 1521 ystart += maxheight; 1522 }/* for(j = 0; j < Ndomains; ++j) */ 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1528 /*@C 1529 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1530 only) for the additive Schwarz preconditioner. 1531 1532 Not Collective 1533 1534 Input Parameter: 1535 . pc - the preconditioner context 1536 1537 Output Parameters: 1538 + n - the number of subdomains for this processor (default value = 1) 1539 . is - the index sets that define the subdomains for this processor 1540 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1541 1542 1543 Notes: 1544 The IS numbering is in the parallel, global numbering of the vector. 1545 1546 Level: advanced 1547 1548 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1549 1550 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1551 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1552 @*/ 1553 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1554 { 1555 PC_GASM *osm; 1556 PetscErrorCode ierr; 1557 PetscBool match; 1558 1559 PetscFunctionBegin; 1560 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1561 PetscValidIntPointer(n,2); 1562 if (is) PetscValidPointer(is,3); 1563 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1564 if (!match) { 1565 if (n) *n = 0; 1566 if (is) *is = PETSC_NULL; 1567 } else { 1568 osm = (PC_GASM*)pc->data; 1569 if (n) *n = osm->n; 1570 if (is) *is = osm->is; 1571 if (is_local) *is_local = osm->is_local; 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1578 /*@C 1579 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1580 only) for the additive Schwarz preconditioner. 1581 1582 Not Collective 1583 1584 Input Parameter: 1585 . pc - the preconditioner context 1586 1587 Output Parameters: 1588 + n - the number of matrices for this processor (default value = 1) 1589 - mat - the matrices 1590 1591 1592 Level: advanced 1593 1594 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1595 1596 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1597 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1598 @*/ 1599 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1600 { 1601 PC_GASM *osm; 1602 PetscErrorCode ierr; 1603 PetscBool match; 1604 1605 PetscFunctionBegin; 1606 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1607 PetscValidIntPointer(n,2); 1608 if (mat) PetscValidPointer(mat,3); 1609 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1610 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1611 if (!match) { 1612 if (n) *n = 0; 1613 if (mat) *mat = PETSC_NULL; 1614 } else { 1615 osm = (PC_GASM*)pc->data; 1616 if (n) *n = osm->n; 1617 if (mat) *mat = osm->pmat; 1618 } 1619 PetscFunctionReturn(0); 1620 } 1621