1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 5 In this version each processor may have any number of subdomains and a subdomain may live on multiple 6 processors. 7 8 N - total number of subdomains on all processors 9 n - actual number of subdomains on this processor 10 nmax - maximum number of subdomains per processor 11 */ 12 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 KSP *ksp; /* linear solvers for each block */ 18 Vec gx,gy; /* Merged work vectors */ 19 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 20 IS gis, gis_local; /* merged ISs */ 21 VecScatter grestriction; /* merged restriction */ 22 VecScatter gprolongation; /* merged prolongation */ 23 IS *is; /* index set that defines each overlapping subdomain */ 24 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 25 Mat *pmat; /* subdomain block matrices */ 26 PCGASMType type; /* use reduced interpolation, restriction or both */ 27 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 28 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 29 PetscBool sort_indices; /* flag to sort subdomain indices */ 30 } PC_GASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCGASMPrintSubdomains" 34 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 35 { 36 PC_GASM *osm = (PC_GASM*)pc->data; 37 const char *prefix; 38 char fname[PETSC_MAX_PATH_LEN+1]; 39 PetscViewer viewer, sviewer; 40 PetscInt i,j,nidx; 41 const PetscInt *idx; 42 char *cidx; 43 PetscBool found; 44 PetscMPIInt size, rank; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 49 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 50 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 51 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 52 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 53 for (i=0;i<osm->n;++i) { 54 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 55 ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr); 56 /* 57 No more than 15 characters per index plus a space. 58 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 59 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 60 For nidx == 0, the whole string 16 '\0'. 61 */ 62 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr); 63 ierr = ISGetIndices(osm->is[i], &idx); CHKERRQ(ierr); 64 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr); 65 for(j = 0; j < nidx; ++j) { 66 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 67 } 68 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 69 ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr); 70 71 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr); 72 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 73 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 74 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 75 ierr = PetscFree(cidx); CHKERRQ(ierr); 76 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 77 if (osm->is_local) { 78 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 79 ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr); 80 /* 81 No more than 15 characters per index plus a space. 82 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 83 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 84 For nidx == 0, the whole string 16 '\0'. 85 */ 86 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr); 87 ierr = ISGetIndices(osm->is_local[i], &idx); CHKERRQ(ierr); 88 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr); 89 for(j = 0; j < nidx; ++j) { 90 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 91 } 92 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 93 ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr); 94 95 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr); 96 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 97 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 98 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 99 ierr = PetscFree(cidx); CHKERRQ(ierr); 100 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 101 } 102 } 103 PetscFunctionReturn(0); 104 } 105 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCView_GASM" 109 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 110 { 111 PC_GASM *osm = (PC_GASM*)pc->data; 112 const char *prefix; 113 PetscErrorCode ierr; 114 PetscMPIInt rank, size; 115 PetscInt i,bsz; 116 PetscBool iascii,isstring, print_subdomains; 117 PetscViewer sviewer; 118 119 120 PetscFunctionBegin; 121 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 122 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 123 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 124 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 125 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 126 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 127 if (iascii) { 128 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 129 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 130 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 131 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 132 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 133 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 134 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 135 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 136 if (osm->same_local_solves) { 137 if (osm->ksp) { 138 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 139 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 140 if (!rank) { 141 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 142 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 143 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 144 } 145 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 146 } 147 } else { 148 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 149 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 150 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 151 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 152 for (i=0; i<osm->nmax; i++) { 153 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 154 if (i < osm->n) { 155 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 156 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 157 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 158 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 159 } 160 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 161 } 162 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 163 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 164 } 165 } else if (isstring) { 166 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 167 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 168 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 169 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 170 } else { 171 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 172 } 173 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 174 if(print_subdomains) { 175 ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 181 182 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "PCSetUp_GASM" 186 static PetscErrorCode PCSetUp_GASM(PC pc) 187 { 188 PC_GASM *osm = (PC_GASM*)pc->data; 189 PetscErrorCode ierr; 190 PetscBool symset,flg; 191 PetscInt i; 192 PetscMPIInt rank, size; 193 MatReuse scall = MAT_REUSE_MATRIX; 194 KSP ksp; 195 PC subpc; 196 const char *prefix,*pprefix; 197 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 198 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 199 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 200 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 201 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 202 Vec x,y; 203 PetscScalar *gxarray, *gyarray; 204 PetscInt gfirst, glast; 205 206 PetscFunctionBegin; 207 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 208 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 209 if (!pc->setupcalled) { 210 211 if (!osm->type_set) { 212 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 213 if (symset && flg) { osm->type = PC_GASM_BASIC; } 214 } 215 216 if (osm->N == PETSC_DECIDE && osm->n < 1) { 217 /* no subdomains given, use one per processor */ 218 osm->nmax = osm->n = 1; 219 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 220 osm->N = size; 221 } else if (osm->N == PETSC_DECIDE) { 222 PetscInt inwork[2], outwork[2]; 223 /* determine global number of subdomains and the max number of local subdomains */ 224 inwork[0] = inwork[1] = osm->n; 225 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 226 osm->nmax = outwork[0]; 227 osm->N = outwork[1]; 228 } 229 if (!osm->is){ /* create the index sets */ 230 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 231 } 232 if (!osm->is_local) { 233 /* 234 This indicates that osm->is should define a nonoverlapping decomposition 235 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 236 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 237 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 238 Therefore, osm->is will be used to define osm->is_local. 239 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 240 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 241 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 242 */ 243 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 244 for (i=0; i<osm->n; i++) { 245 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 246 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 247 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 248 } else { 249 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 250 osm->is_local[i] = osm->is[i]; 251 } 252 } 253 } 254 /* Beyond this point osm->is_local is not null. */ 255 if (osm->overlap > 0) { 256 /* Extend the "overlapping" regions by a number of steps */ 257 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 258 } 259 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 260 flg = PETSC_FALSE; 261 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 262 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 263 264 if (osm->sort_indices) { 265 for (i=0; i<osm->n; i++) { 266 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 267 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 268 } 269 } 270 /* Merge the ISs, create merged vectors and scatter contexts. */ 271 /* Restriction ISs. */ 272 ddn = 0; 273 for (i=0; i<osm->n; i++) { 274 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 275 ddn += dn; 276 } 277 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr); 278 ddn = 0; 279 for (i=0; i<osm->n; i++) { 280 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 281 ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr); 282 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr); 283 ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr); 284 ddn += dn; 285 } 286 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr); 287 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 288 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr); 289 ierr = VecDuplicate(osm->gx,&osm->gy); CHKERRQ(ierr); 290 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast); CHKERRQ(ierr); 291 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid); CHKERRQ(ierr); 292 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction)); CHKERRQ(ierr); 293 ierr = ISDestroy(gid); CHKERRQ(ierr); 294 /* Prolongation ISs */ 295 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 296 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 297 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 298 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 299 /**/ 300 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 301 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 302 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 303 IS gis_llocal; /* IS with ddidx_llocal indices. */ 304 PetscInt j; 305 ddn_local = 0; 306 for (i=0; i<osm->n; i++) { 307 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 308 ddn_local += dn_local; 309 } 310 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr); 311 ddn_local = 0; 312 for (i=0; i<osm->n; i++) { 313 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 314 ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr); 315 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr); 316 ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr); 317 ddn_local += dn_local; 318 } 319 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr); 320 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr); 321 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 322 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 323 ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr); 324 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 325 /* Now convert these localized indices into the global indices into the merged output vector. */ 326 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr); 327 for(j=0; j < ddn_llocal; ++j) { 328 ddidx_llocal[j] += gfirst; 329 } 330 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr); 331 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation); CHKERRQ(ierr); 332 ierr = ISDestroy(gis_llocal);CHKERRQ(ierr); 333 } 334 /* Create the subdomain work vectors. */ 335 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 336 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 337 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 338 ierr = VecGetArray(osm->gx, &gxarray); CHKERRQ(ierr); 339 ierr = VecGetArray(osm->gy, &gyarray); CHKERRQ(ierr); 340 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 341 PetscInt dN; 342 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 343 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 344 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 345 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 346 } 347 ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr); 348 ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr); 349 ierr = VecDestroy(x); CHKERRQ(ierr); 350 ierr = VecDestroy(y); CHKERRQ(ierr); 351 /* Create the local solvers */ 352 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 353 for (i=0; i<osm->n; i++) { /* KSPs are local */ 354 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 355 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 356 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 357 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 358 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 359 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 360 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 361 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 362 osm->ksp[i] = ksp; 363 } 364 scall = MAT_INITIAL_MATRIX; 365 366 } else { 367 /* 368 Destroy the blocks from the previous iteration 369 */ 370 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 371 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 372 scall = MAT_INITIAL_MATRIX; 373 } 374 } 375 376 /* 377 Extract out the submatrices. 378 */ 379 if(size > 1) { 380 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 381 } 382 else { 383 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 384 } 385 if (scall == MAT_INITIAL_MATRIX) { 386 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 387 for (i=0; i<osm->n; i++) { 388 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 389 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 390 } 391 } 392 393 /* Return control to the user so that the submatrices can be modified (e.g., to apply 394 different boundary conditions for the submatrices than for the global problem) */ 395 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 396 397 /* 398 Loop over submatrices putting them into local ksp 399 */ 400 for (i=0; i<osm->n; i++) { 401 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 402 if (!pc->setupcalled) { 403 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 404 } 405 } 406 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 412 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 413 { 414 PC_GASM *osm = (PC_GASM*)pc->data; 415 PetscErrorCode ierr; 416 PetscInt i; 417 418 PetscFunctionBegin; 419 for (i=0; i<osm->n; i++) { 420 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 421 } 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "PCApply_GASM" 427 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 428 { 429 PC_GASM *osm = (PC_GASM*)pc->data; 430 PetscErrorCode ierr; 431 PetscInt i; 432 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 433 434 PetscFunctionBegin; 435 /* 436 Support for limiting the restriction or interpolation to only local 437 subdomain values (leaving the other values 0). 438 */ 439 if (!(osm->type & PC_GASM_RESTRICT)) { 440 forward = SCATTER_FORWARD_LOCAL; 441 /* have to zero the work RHS since scatter may leave some slots empty */ 442 ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr); 443 } 444 if (!(osm->type & PC_GASM_INTERPOLATE)) { 445 reverse = SCATTER_REVERSE_LOCAL; 446 } 447 448 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 449 ierr = VecZeroEntries(y);CHKERRQ(ierr); 450 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 451 /* do the subdomain solves */ 452 for (i=0; i<osm->n; ++i) { 453 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 454 } 455 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 456 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCApplyTranspose_GASM" 462 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 463 { 464 PC_GASM *osm = (PC_GASM*)pc->data; 465 PetscErrorCode ierr; 466 PetscInt i; 467 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 468 469 PetscFunctionBegin; 470 /* 471 Support for limiting the restriction or interpolation to only local 472 subdomain values (leaving the other values 0). 473 474 Note: these are reversed from the PCApply_GASM() because we are applying the 475 transpose of the three terms 476 */ 477 if (!(osm->type & PC_GASM_INTERPOLATE)) { 478 forward = SCATTER_FORWARD_LOCAL; 479 /* have to zero the work RHS since scatter may leave some slots empty */ 480 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 481 } 482 if (!(osm->type & PC_GASM_RESTRICT)) { 483 reverse = SCATTER_REVERSE_LOCAL; 484 } 485 486 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 487 ierr = VecZeroEntries(y);CHKERRQ(ierr); 488 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 489 /* do the local solves */ 490 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 491 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 492 } 493 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 494 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "PCDestroy_GASM" 500 static PetscErrorCode PCDestroy_GASM(PC pc) 501 { 502 PC_GASM *osm = (PC_GASM*)pc->data; 503 PetscErrorCode ierr; 504 PetscInt i; 505 506 PetscFunctionBegin; 507 if (osm->ksp) { 508 for (i=0; i<osm->n; i++) { 509 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 510 } 511 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 512 } 513 if (osm->pmat) { 514 if (osm->n > 0) { 515 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 516 } 517 } 518 for (i=0; i<osm->n; i++) { 519 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 520 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 521 } 522 ierr = PetscFree(osm->x);CHKERRQ(ierr); 523 ierr = PetscFree(osm->y);CHKERRQ(ierr); 524 ierr = VecDestroy(osm->gx); CHKERRQ(ierr); 525 ierr = VecDestroy(osm->gy); CHKERRQ(ierr); 526 527 ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr); 528 ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr); 529 530 if (osm->is) { 531 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 532 } 533 534 if(osm->gis) { 535 ierr = ISDestroy(osm->gis); CHKERRQ(ierr); 536 } 537 if(osm->gis_local) { 538 ierr = ISDestroy(osm->gis_local); CHKERRQ(ierr); 539 } 540 ierr = PetscFree(pc->data);CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "PCSetFromOptions_GASM" 546 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 547 PC_GASM *osm = (PC_GASM*)pc->data; 548 PetscErrorCode ierr; 549 PetscInt blocks,ovl; 550 PetscBool symset,flg; 551 PCGASMType gasmtype; 552 553 PetscFunctionBegin; 554 /* set the type to symmetric if matrix is symmetric */ 555 if (!osm->type_set && pc->pmat) { 556 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 557 if (symset && flg) { osm->type = PC_GASM_BASIC; } 558 } 559 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 560 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 561 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 562 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 563 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 564 flg = PETSC_FALSE; 565 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 566 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 567 ierr = PetscOptionsTail();CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 /*------------------------------------------------------------------------------------*/ 572 573 EXTERN_C_BEGIN 574 #undef __FUNCT__ 575 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 576 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 577 { 578 PC_GASM *osm = (PC_GASM*)pc->data; 579 PetscErrorCode ierr; 580 PetscInt i; 581 582 PetscFunctionBegin; 583 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 584 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 585 586 if (!pc->setupcalled) { 587 osm->n = n; 588 osm->is = 0; 589 osm->is_local = 0; 590 if (is) { 591 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 592 } 593 if (is_local) { 594 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 595 } 596 if (osm->is) { 597 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 598 } 599 if (is) { 600 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 601 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 602 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 603 osm->overlap = -1; 604 } 605 if (is_local) { 606 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 607 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 608 } 609 } 610 PetscFunctionReturn(0); 611 } 612 EXTERN_C_END 613 614 EXTERN_C_BEGIN 615 #undef __FUNCT__ 616 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 617 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 618 PC_GASM *osm = (PC_GASM*)pc->data; 619 PetscErrorCode ierr; 620 PetscMPIInt rank,size; 621 PetscInt n; 622 623 PetscFunctionBegin; 624 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 625 626 /* 627 Split the subdomains equally among all processors 628 */ 629 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 630 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 631 n = N/size + ((N % size) > rank); 632 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 633 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 634 if (!pc->setupcalled) { 635 if (osm->is) { 636 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 637 } 638 osm->N = N; 639 osm->n = n; 640 osm->is = 0; 641 osm->is_local = 0; 642 } 643 PetscFunctionReturn(0); 644 }/* PCGASMSetTotalSubdomains_GASM() */ 645 EXTERN_C_END 646 647 EXTERN_C_BEGIN 648 #undef __FUNCT__ 649 #define __FUNCT__ "PCGASMSetOverlap_GASM" 650 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 651 { 652 PC_GASM *osm = (PC_GASM*)pc->data; 653 654 PetscFunctionBegin; 655 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 656 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 657 if (!pc->setupcalled) { 658 osm->overlap = ovl; 659 } 660 PetscFunctionReturn(0); 661 } 662 EXTERN_C_END 663 664 EXTERN_C_BEGIN 665 #undef __FUNCT__ 666 #define __FUNCT__ "PCGASMSetType_GASM" 667 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 668 { 669 PC_GASM *osm = (PC_GASM*)pc->data; 670 671 PetscFunctionBegin; 672 osm->type = type; 673 osm->type_set = PETSC_TRUE; 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 EXTERN_C_BEGIN 679 #undef __FUNCT__ 680 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 681 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 682 { 683 PC_GASM *osm = (PC_GASM*)pc->data; 684 685 PetscFunctionBegin; 686 osm->sort_indices = doSort; 687 PetscFunctionReturn(0); 688 } 689 EXTERN_C_END 690 691 EXTERN_C_BEGIN 692 #undef __FUNCT__ 693 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 694 /* 695 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 696 In particular, it would upset the global subdomain number calculation. 697 */ 698 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 699 { 700 PC_GASM *osm = (PC_GASM*)pc->data; 701 PetscErrorCode ierr; 702 703 PetscFunctionBegin; 704 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 705 706 if (n) { 707 *n = osm->n; 708 } 709 if (first) { 710 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 711 *first -= osm->n; 712 } 713 if (ksp) { 714 /* Assume that local solves are now different; not necessarily 715 true though! This flag is used only for PCView_GASM() */ 716 *ksp = osm->ksp; 717 osm->same_local_solves = PETSC_FALSE; 718 } 719 PetscFunctionReturn(0); 720 }/* PCGASMGetSubKSP_GASM() */ 721 EXTERN_C_END 722 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "PCGASMSetLocalSubdomains" 726 /*@C 727 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 728 only) for the additive Schwarz preconditioner. 729 730 Collective on PC 731 732 Input Parameters: 733 + pc - the preconditioner context 734 . n - the number of subdomains for this processor (default value = 1) 735 . is - the index set that defines the subdomains for this processor 736 (or PETSC_NULL for PETSc to determine subdomains) 737 - is_local - the index sets that define the local part of the subdomains for this processor 738 (or PETSC_NULL to use the default of 1 subdomain per process) 739 740 Notes: 741 The IS numbering is in the parallel, global numbering of the vector. 742 743 By default the GASM preconditioner uses 1 block per processor. 744 745 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 746 747 Level: advanced 748 749 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 750 751 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 752 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 753 @*/ 754 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 755 { 756 PetscErrorCode ierr; 757 758 PetscFunctionBegin; 759 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 760 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "PCGASMSetTotalSubdomains" 766 /*@C 767 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 768 additive Schwarz preconditioner. Either all or no processors in the 769 PC communicator must call this routine, with the same index sets. 770 771 Collective on PC 772 773 Input Parameters: 774 + pc - the preconditioner context 775 . n - the number of subdomains for all processors 776 . is - the index sets that define the subdomains for all processor 777 (or PETSC_NULL for PETSc to determine subdomains) 778 - is_local - the index sets that define the local part of the subdomains for this processor 779 (or PETSC_NULL to use the default of 1 subdomain per process) 780 781 Options Database Key: 782 To set the total number of subdomain blocks rather than specify the 783 index sets, use the option 784 . -pc_gasm_blocks <blks> - Sets total blocks 785 786 Notes: 787 Currently you cannot use this to set the actual subdomains with the argument is. 788 789 By default the GASM preconditioner uses 1 block per processor. 790 791 These index sets cannot be destroyed until after completion of the 792 linear solves for which the GASM preconditioner is being used. 793 794 Use PCGASMSetLocalSubdomains() to set local subdomains. 795 796 Level: advanced 797 798 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 799 800 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 801 PCGASMCreateSubdomains2D() 802 @*/ 803 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 804 { 805 PetscErrorCode ierr; 806 807 PetscFunctionBegin; 808 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 809 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNCT__ 814 #define __FUNCT__ "PCGASMSetOverlap" 815 /*@ 816 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 817 additive Schwarz preconditioner. Either all or no processors in the 818 PC communicator must call this routine. 819 820 Logically Collective on PC 821 822 Input Parameters: 823 + pc - the preconditioner context 824 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 825 826 Options Database Key: 827 . -pc_gasm_overlap <ovl> - Sets overlap 828 829 Notes: 830 By default the GASM preconditioner uses 1 block per processor. To use 831 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 832 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 833 834 The overlap defaults to 1, so if one desires that no additional 835 overlap be computed beyond what may have been set with a call to 836 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 837 must be set to be 0. In particular, if one does not explicitly set 838 the subdomains an application code, then all overlap would be computed 839 internally by PETSc, and using an overlap of 0 would result in an GASM 840 variant that is equivalent to the block Jacobi preconditioner. 841 842 Note that one can define initial index sets with any overlap via 843 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 844 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 845 if desired. 846 847 Level: intermediate 848 849 .keywords: PC, GASM, set, overlap 850 851 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 852 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 853 @*/ 854 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 855 { 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 860 PetscValidLogicalCollectiveInt(pc,ovl,2); 861 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "PCGASMSetType" 867 /*@ 868 PCGASMSetType - Sets the type of restriction and interpolation used 869 for local problems in the additive Schwarz method. 870 871 Logically Collective on PC 872 873 Input Parameters: 874 + pc - the preconditioner context 875 - type - variant of GASM, one of 876 .vb 877 PC_GASM_BASIC - full interpolation and restriction 878 PC_GASM_RESTRICT - full restriction, local processor interpolation 879 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 880 PC_GASM_NONE - local processor restriction and interpolation 881 .ve 882 883 Options Database Key: 884 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 885 886 Level: intermediate 887 888 .keywords: PC, GASM, set, type 889 890 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 891 PCGASMCreateSubdomains2D() 892 @*/ 893 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 894 { 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 899 PetscValidLogicalCollectiveEnum(pc,type,2); 900 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "PCGASMSetSortIndices" 906 /*@ 907 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 908 909 Logically Collective on PC 910 911 Input Parameters: 912 + pc - the preconditioner context 913 - doSort - sort the subdomain indices 914 915 Level: intermediate 916 917 .keywords: PC, GASM, set, type 918 919 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 920 PCGASMCreateSubdomains2D() 921 @*/ 922 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 923 { 924 PetscErrorCode ierr; 925 926 PetscFunctionBegin; 927 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 928 PetscValidLogicalCollectiveBool(pc,doSort,2); 929 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "PCGASMGetSubKSP" 935 /*@C 936 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 937 this processor. 938 939 Collective on PC iff first_local is requested 940 941 Input Parameter: 942 . pc - the preconditioner context 943 944 Output Parameters: 945 + n_local - the number of blocks on this processor or PETSC_NULL 946 . first_local - the global number of the first block on this processor or PETSC_NULL, 947 all processors must request or all must pass PETSC_NULL 948 - ksp - the array of KSP contexts 949 950 Note: 951 After PCGASMGetSubKSP() the array of KSPes is not to be freed 952 953 Currently for some matrix implementations only 1 block per processor 954 is supported. 955 956 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 957 958 Level: advanced 959 960 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 961 962 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 963 PCGASMCreateSubdomains2D(), 964 @*/ 965 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 966 { 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 971 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 /* -------------------------------------------------------------------------------------*/ 976 /*MC 977 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 978 its own KSP object. 979 980 Options Database Keys: 981 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 982 . -pc_gasm_blocks <blks> - Sets total blocks 983 . -pc_gasm_overlap <ovl> - Sets overlap 984 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 985 986 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 987 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 988 -pc_gasm_type basic to use the standard GASM. 989 990 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 991 than one processor. Defaults to one block per processor. 992 993 To set options on the solvers for each block append -sub_ to all the KSP, and PC 994 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 995 996 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 997 and set the options directly on the resulting KSP object (you can access its PC 998 with KSPGetPC()) 999 1000 1001 Level: beginner 1002 1003 Concepts: additive Schwarz method 1004 1005 References: 1006 An additive variant of the Schwarz alternating method for the case of many subregions 1007 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1008 1009 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1010 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1011 1012 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1013 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1014 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1015 1016 M*/ 1017 1018 EXTERN_C_BEGIN 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "PCCreate_GASM" 1021 PetscErrorCode PCCreate_GASM(PC pc) 1022 { 1023 PetscErrorCode ierr; 1024 PC_GASM *osm; 1025 1026 PetscFunctionBegin; 1027 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1028 osm->N = PETSC_DECIDE; 1029 osm->n = 0; 1030 osm->nmax = 0; 1031 osm->overlap = 1; 1032 osm->ksp = 0; 1033 osm->grestriction = 0; 1034 osm->gprolongation = 0; 1035 osm->gx = 0; 1036 osm->gy = 0; 1037 osm->x = 0; 1038 osm->y = 0; 1039 osm->is = 0; 1040 osm->is_local = 0; 1041 osm->pmat = 0; 1042 osm->type = PC_GASM_RESTRICT; 1043 osm->same_local_solves = PETSC_TRUE; 1044 osm->sort_indices = PETSC_TRUE; 1045 1046 pc->data = (void*)osm; 1047 pc->ops->apply = PCApply_GASM; 1048 pc->ops->applytranspose = PCApplyTranspose_GASM; 1049 pc->ops->setup = PCSetUp_GASM; 1050 pc->ops->destroy = PCDestroy_GASM; 1051 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1052 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1053 pc->ops->view = PCView_GASM; 1054 pc->ops->applyrichardson = 0; 1055 1056 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1057 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1058 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1059 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1060 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1061 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1062 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1063 PCGASMSetType_GASM);CHKERRQ(ierr); 1064 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1065 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1067 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 EXTERN_C_END 1071 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "PCGASMCreateSubdomains" 1075 /*@C 1076 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1077 preconditioner for a any problem on a general grid. 1078 1079 Collective 1080 1081 Input Parameters: 1082 + A - The global matrix operator 1083 - n - the number of local blocks 1084 1085 Output Parameters: 1086 . outis - the array of index sets defining the subdomains 1087 1088 Level: advanced 1089 1090 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1091 from these if you use PCGASMSetLocalSubdomains() 1092 1093 In the Fortran version you must provide the array outis[] already allocated of length n. 1094 1095 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1096 1097 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1098 @*/ 1099 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1100 { 1101 MatPartitioning mpart; 1102 const char *prefix; 1103 PetscErrorCode (*f)(Mat,PetscBool *,MatReuse,Mat*); 1104 PetscMPIInt size; 1105 PetscInt i,j,rstart,rend,bs; 1106 PetscBool iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1107 Mat Ad = PETSC_NULL, adj; 1108 IS ispart,isnumb,*is; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1113 PetscValidPointer(outis,3); 1114 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1115 1116 /* Get prefix, row distribution, and block size */ 1117 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1118 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1119 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1120 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1121 1122 /* Get diagonal block from matrix if possible */ 1123 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1124 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1125 if (f) { 1126 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1127 } else if (size == 1) { 1128 iscopy = PETSC_FALSE; Ad = A; 1129 } else { 1130 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1131 } 1132 if (Ad) { 1133 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1134 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1135 } 1136 if (Ad && n > 1) { 1137 PetscBool match,done; 1138 /* Try to setup a good matrix partitioning if available */ 1139 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1140 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1141 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1142 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1143 if (!match) { 1144 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1145 } 1146 if (!match) { /* assume a "good" partitioner is available */ 1147 PetscInt na,*ia,*ja; 1148 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1149 if (done) { 1150 /* Build adjacency matrix by hand. Unfortunately a call to 1151 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1152 remove the block-aij structure and we cannot expect 1153 MatPartitioning to split vertices as we need */ 1154 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1155 nnz = 0; 1156 for (i=0; i<na; i++) { /* count number of nonzeros */ 1157 len = ia[i+1] - ia[i]; 1158 row = ja + ia[i]; 1159 for (j=0; j<len; j++) { 1160 if (row[j] == i) { /* don't count diagonal */ 1161 len--; break; 1162 } 1163 } 1164 nnz += len; 1165 } 1166 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1167 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1168 nnz = 0; 1169 iia[0] = 0; 1170 for (i=0; i<na; i++) { /* fill adjacency */ 1171 cnt = 0; 1172 len = ia[i+1] - ia[i]; 1173 row = ja + ia[i]; 1174 for (j=0; j<len; j++) { 1175 if (row[j] != i) { /* if not diagonal */ 1176 jja[nnz+cnt++] = row[j]; 1177 } 1178 } 1179 nnz += cnt; 1180 iia[i+1] = nnz; 1181 } 1182 /* Partitioning of the adjacency matrix */ 1183 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1184 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1185 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1186 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1187 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1188 ierr = MatDestroy(adj);CHKERRQ(ierr); 1189 foundpart = PETSC_TRUE; 1190 } 1191 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1192 } 1193 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1194 } 1195 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1196 1197 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1198 *outis = is; 1199 1200 if (!foundpart) { 1201 1202 /* Partitioning by contiguous chunks of rows */ 1203 1204 PetscInt mbs = (rend-rstart)/bs; 1205 PetscInt start = rstart; 1206 for (i=0; i<n; i++) { 1207 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1208 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1209 start += count; 1210 } 1211 1212 } else { 1213 1214 /* Partitioning by adjacency of diagonal block */ 1215 1216 const PetscInt *numbering; 1217 PetscInt *count,nidx,*indices,*newidx,start=0; 1218 /* Get node count in each partition */ 1219 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1220 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1221 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1222 for (i=0; i<n; i++) count[i] *= bs; 1223 } 1224 /* Build indices from node numbering */ 1225 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1226 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1227 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1228 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1229 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1230 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1231 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1232 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1233 for (i=0; i<nidx; i++) 1234 for (j=0; j<bs; j++) 1235 newidx[i*bs+j] = indices[i]*bs + j; 1236 ierr = PetscFree(indices);CHKERRQ(ierr); 1237 nidx *= bs; 1238 indices = newidx; 1239 } 1240 /* Shift to get global indices */ 1241 for (i=0; i<nidx; i++) indices[i] += rstart; 1242 1243 /* Build the index sets for each block */ 1244 for (i=0; i<n; i++) { 1245 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1246 ierr = ISSort(is[i]);CHKERRQ(ierr); 1247 start += count[i]; 1248 } 1249 1250 ierr = PetscFree(count); 1251 ierr = PetscFree(indices); 1252 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1253 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1254 } 1255 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "PCGASMDestroySubdomains" 1261 /*@C 1262 PCGASMDestroySubdomains - Destroys the index sets created with 1263 PCGASMCreateSubdomains(). Should be called after setting subdomains 1264 with PCGASMSetLocalSubdomains(). 1265 1266 Collective 1267 1268 Input Parameters: 1269 + n - the number of index sets 1270 . is - the array of index sets 1271 - is_local - the array of local index sets, can be PETSC_NULL 1272 1273 Level: advanced 1274 1275 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1276 1277 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1278 @*/ 1279 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1280 { 1281 PetscInt i; 1282 PetscErrorCode ierr; 1283 PetscFunctionBegin; 1284 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1285 PetscValidPointer(is,2); 1286 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1287 ierr = PetscFree(is);CHKERRQ(ierr); 1288 if (is_local) { 1289 PetscValidPointer(is_local,3); 1290 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1291 ierr = PetscFree(is_local);CHKERRQ(ierr); 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 1297 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1298 { \ 1299 PetscInt first_row = first/M, last_row = last/M+1; \ 1300 /* \ 1301 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1302 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1303 subdomain). \ 1304 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1305 of the intersection. \ 1306 */ \ 1307 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1308 *ylow_loc = PetscMax(first_row,ylow); \ 1309 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1310 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1311 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1312 *yhigh_loc = PetscMin(last_row,yhigh); \ 1313 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1314 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1315 /* Now compute the size of the local subdomain n. */ \ 1316 *n = 0; \ 1317 if(*ylow_loc < *yhigh_loc) { \ 1318 PetscInt width = xright-xleft; \ 1319 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1320 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1321 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1322 }\ 1323 } 1324 1325 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1329 /*@ 1330 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1331 preconditioner for a two-dimensional problem on a regular grid. 1332 1333 Collective 1334 1335 Input Parameters: 1336 + M, N - the global number of mesh points in the x and y directions 1337 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1338 . dof - degrees of freedom per node 1339 - overlap - overlap in mesh lines 1340 1341 Output Parameters: 1342 + Nsub - the number of local subdomains created 1343 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1344 - is_local - array of index sets defining non-overlapping subdomains 1345 1346 Note: 1347 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1348 preconditioners. More general related routines are 1349 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1350 1351 Level: advanced 1352 1353 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1354 1355 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1356 PCGASMSetOverlap() 1357 @*/ 1358 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1359 { 1360 PetscErrorCode ierr; 1361 PetscMPIInt size, rank; 1362 PetscInt i, j; 1363 PetscInt maxheight, maxwidth; 1364 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1365 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1366 PetscInt x[2][2], y[2][2], n[2]; 1367 PetscInt first, last; 1368 PetscInt nidx, *idx; 1369 PetscInt ii,jj,s,q,d; 1370 PetscInt k,kk; 1371 PetscMPIInt color; 1372 MPI_Comm comm, subcomm; 1373 1374 IS **iis; 1375 PetscFunctionBegin; 1376 ierr = PetscObjectGetComm((PetscObject)pc, &comm); CHKERRQ(ierr); 1377 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 1378 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 1379 ierr = MatGetOwnershipRange(pc->pmat, &first, &last); CHKERRQ(ierr); 1380 if(first%dof || last%dof) { 1381 SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1382 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1383 } 1384 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1385 s = 0; 1386 ystart = 0; 1387 for (j=0; j<Ndomains; ++j) { 1388 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1389 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1390 /* Vertical domain limits with an overlap. */ 1391 ylow = PetscMax(ystart - overlap,0); 1392 yhigh = PetscMin(ystart + maxheight + overlap,N); 1393 xstart = 0; 1394 for (i=0; i<Mdomains; ++i) { 1395 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1396 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1397 /* Horizontal domain limits with an overlap. */ 1398 xleft = PetscMax(xstart - overlap,0); 1399 xright = PetscMin(xstart + maxwidth + overlap,M); 1400 /* 1401 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1402 */ 1403 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1404 if(nidx) { 1405 ++s; 1406 } 1407 xstart += maxwidth; 1408 }/* for(i = 0; i < Mdomains; ++i) */ 1409 ystart += maxheight; 1410 }/* for(j = 0; j < Ndomains; ++j) */ 1411 /* Now we can allocate the necessary number of ISs. */ 1412 *nsub = s; 1413 ierr = PetscMalloc((*nsub)*sizeof(IS*),is); CHKERRQ(ierr); 1414 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local); CHKERRQ(ierr); 1415 s = 0; 1416 ystart = 0; 1417 for (j=0; j<Ndomains; ++j) { 1418 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1419 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1420 /* Vertical domain limits with an overlap. */ 1421 y[0][0] = PetscMax(ystart - overlap,0); 1422 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1423 /* Vertical domain limits without an overlap. */ 1424 y[1][0] = ystart; 1425 y[1][1] = PetscMin(ystart + maxheight,N); 1426 xstart = 0; 1427 for (i=0; i<Mdomains; ++i) { 1428 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1429 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1430 /* Horizontal domain limits with an overlap. */ 1431 x[0][0] = PetscMax(xstart - overlap,0); 1432 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1433 /* Horizontal domain limits without an overlap. */ 1434 x[1][0] = xstart; 1435 x[1][1] = PetscMin(xstart+maxwidth,M); 1436 /* 1437 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1438 Do this twice: first for the domains with overlaps, and once without. 1439 During the first pass create the subcommunicators, and use them on the second pass as well. 1440 */ 1441 for(q = 0; q < 2; ++q) { 1442 /* 1443 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1444 according to whether the domain with an overlap or without is considered. 1445 */ 1446 xleft = x[q][0]; xright = x[q][1]; 1447 ylow = y[q][0]; yhigh = y[q][1]; 1448 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1449 nidx *= dof; 1450 n[q] = nidx; 1451 /* 1452 Based on the counted number of indices in the local domain *with an overlap*, 1453 construct a subcommunicator of all the processors supporting this domain. 1454 Observe that a domain with an overlap might have nontrivial local support, 1455 while the domain without an overlap might not. Hence, the decision to participate 1456 in the subcommunicator must be based on the domain with an overlap. 1457 */ 1458 if(q == 0) { 1459 if(nidx) { 1460 color = 1; 1461 } 1462 else { 1463 color = MPI_UNDEFINED; 1464 } 1465 ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr); 1466 } 1467 /* 1468 Proceed only if the number of local indices *with an overlap* is nonzero. 1469 */ 1470 if(n[0]) { 1471 if(q == 0) { 1472 iis = is; 1473 } 1474 if(q == 1) { 1475 /* 1476 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1477 Moreover, if the overlap is zero, the two ISs are identical. 1478 */ 1479 if(overlap == 0) { 1480 (*is_local)[s] = (*is)[s]; 1481 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1482 continue; 1483 } 1484 else { 1485 iis = is_local; 1486 subcomm = ((PetscObject)(*is)[s])->comm; 1487 } 1488 }/* if(q == 1) */ 1489 idx = PETSC_NULL; 1490 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1491 if(nidx) { 1492 k = 0; 1493 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1494 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1495 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1496 kk = dof*(M*jj + x0); 1497 for (ii=x0; ii<x1; ++ii) { 1498 for(d = 0; d < dof; ++d) { 1499 idx[k++] = kk++; 1500 } 1501 } 1502 } 1503 } 1504 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1505 }/* if(n[0]) */ 1506 }/* for(q = 0; q < 2; ++q) */ 1507 if(n[0]) { 1508 ++s; 1509 } 1510 xstart += maxwidth; 1511 }/* for(i = 0; i < Mdomains; ++i) */ 1512 ystart += maxheight; 1513 }/* for(j = 0; j < Ndomains; ++j) */ 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1519 /*@C 1520 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1521 only) for the additive Schwarz preconditioner. 1522 1523 Not Collective 1524 1525 Input Parameter: 1526 . pc - the preconditioner context 1527 1528 Output Parameters: 1529 + n - the number of subdomains for this processor (default value = 1) 1530 . is - the index sets that define the subdomains for this processor 1531 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1532 1533 1534 Notes: 1535 The IS numbering is in the parallel, global numbering of the vector. 1536 1537 Level: advanced 1538 1539 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1540 1541 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1542 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1543 @*/ 1544 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1545 { 1546 PC_GASM *osm; 1547 PetscErrorCode ierr; 1548 PetscBool match; 1549 1550 PetscFunctionBegin; 1551 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1552 PetscValidIntPointer(n,2); 1553 if (is) PetscValidPointer(is,3); 1554 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1555 if (!match) { 1556 if (n) *n = 0; 1557 if (is) *is = PETSC_NULL; 1558 } else { 1559 osm = (PC_GASM*)pc->data; 1560 if (n) *n = osm->n; 1561 if (is) *is = osm->is; 1562 if (is_local) *is_local = osm->is_local; 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1569 /*@C 1570 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1571 only) for the additive Schwarz preconditioner. 1572 1573 Not Collective 1574 1575 Input Parameter: 1576 . pc - the preconditioner context 1577 1578 Output Parameters: 1579 + n - the number of matrices for this processor (default value = 1) 1580 - mat - the matrices 1581 1582 1583 Level: advanced 1584 1585 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1586 1587 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1588 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1589 @*/ 1590 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1591 { 1592 PC_GASM *osm; 1593 PetscErrorCode ierr; 1594 PetscBool match; 1595 1596 PetscFunctionBegin; 1597 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1598 PetscValidIntPointer(n,2); 1599 if (mat) PetscValidPointer(mat,3); 1600 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1601 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1602 if (!match) { 1603 if (n) *n = 0; 1604 if (mat) *mat = PETSC_NULL; 1605 } else { 1606 osm = (PC_GASM*)pc->data; 1607 if (n) *n = osm->n; 1608 if (mat) *mat = osm->pmat; 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612