1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 5 In this version each processor may have any number of subdomains and a subdomain may live on multiple 6 processors. 7 8 N - total number of subdomains on all processors 9 n - actual number of subdomains on this processor 10 nmax - maximum number of subdomains per processor 11 */ 12 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 KSP *ksp; /* linear solvers for each block */ 18 Vec gx,gy; /* Merged work vectors */ 19 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 20 IS gis, gis_local; /* merged ISs */ 21 VecScatter grestriction; /* merged restriction */ 22 VecScatter gprolongation; /* merged prolongation */ 23 IS *is; /* index set that defines each overlapping subdomain */ 24 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 25 Mat *pmat; /* subdomain block matrices */ 26 PCGASMType type; /* use reduced interpolation, restriction or both */ 27 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 28 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 29 PetscBool sort_indices; /* flag to sort subdomain indices */ 30 } PC_GASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCGASMPrintSubdomains" 34 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 35 { 36 PC_GASM *osm = (PC_GASM*)pc->data; 37 const char *prefix; 38 char fname[PETSC_MAX_PATH_LEN+1]; 39 PetscViewer viewer, sviewer; 40 PetscInt i,j,nidx; 41 const PetscInt *idx; 42 char *cidx; 43 PetscBool found; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 48 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 49 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 50 for (i=0;i<osm->n;++i) { 51 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 52 ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr); 53 ierr = ISGetIndices(osm->is[i], &idx); CHKERRQ(ierr); 54 ierr = PetscMalloc(sizeof(char)*(16*nidx+1), &cidx); CHKERRQ(ierr); /* no more than 15 characters per index plus a space*/ 55 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*nidx, &sviewer); CHKERRQ(ierr); 56 for(j = 0; j < nidx; ++j) { 57 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 58 } 59 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 60 ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr); 61 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr); 62 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 63 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 64 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 65 ierr = PetscFree(cidx); CHKERRQ(ierr); 66 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 67 if (osm->is_local) { 68 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 69 ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr); 70 ierr = ISGetIndices(osm->is_local[i], &idx); CHKERRQ(ierr); 71 ierr = PetscMalloc(sizeof(char)*(16*nidx+1), &cidx); CHKERRQ(ierr); /* no more than 15 characters per index plus a space*/ 72 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*nidx, &sviewer); CHKERRQ(ierr); 73 for(j = 0; j < nidx; ++j) { 74 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 75 } 76 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 77 ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr); 78 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr); 79 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 80 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 81 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 82 ierr = PetscFree(cidx); CHKERRQ(ierr); 83 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 84 } 85 } 86 PetscFunctionReturn(0); 87 } 88 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "PCView_GASM" 92 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 93 { 94 PC_GASM *osm = (PC_GASM*)pc->data; 95 const char *prefix; 96 PetscErrorCode ierr; 97 PetscMPIInt rank, size; 98 PetscInt i,bsz; 99 PetscBool iascii,isstring, print_subdomains; 100 PetscViewer sviewer; 101 102 103 PetscFunctionBegin; 104 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 105 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 106 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 107 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 108 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 109 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 110 if (iascii) { 111 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 112 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 113 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 114 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 115 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 116 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 117 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 118 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 119 if (osm->same_local_solves) { 120 if (osm->ksp) { 121 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 122 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 123 if (!rank) { 124 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 125 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 126 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 127 } 128 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 129 } 130 } else { 131 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 132 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 133 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 134 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 135 for (i=0; i<osm->nmax; i++) { 136 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 137 if (i < osm->n) { 138 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 139 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 140 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 141 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 142 } 143 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 144 } 145 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 146 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 147 } 148 } else if (isstring) { 149 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 150 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 151 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 152 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 153 } else { 154 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 155 } 156 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 157 if(print_subdomains) { 158 ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr); 159 } 160 PetscFunctionReturn(0); 161 } 162 163 164 165 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "PCSetUp_GASM" 169 static PetscErrorCode PCSetUp_GASM(PC pc) 170 { 171 PC_GASM *osm = (PC_GASM*)pc->data; 172 PetscErrorCode ierr; 173 PetscBool symset,flg; 174 PetscInt i; 175 PetscMPIInt rank, size; 176 MatReuse scall = MAT_REUSE_MATRIX; 177 KSP ksp; 178 PC subpc; 179 const char *prefix,*pprefix; 180 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 181 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 182 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 183 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 184 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 185 Vec x,y; 186 PetscScalar *gxarray, *gyarray; 187 PetscInt gfirst, glast; 188 189 PetscFunctionBegin; 190 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 191 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 192 if (!pc->setupcalled) { 193 194 if (!osm->type_set) { 195 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 196 if (symset && flg) { osm->type = PC_GASM_BASIC; } 197 } 198 199 if (osm->N == PETSC_DECIDE && osm->n < 1) { 200 /* no subdomains given, use one per processor */ 201 osm->nmax = osm->n = 1; 202 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 203 osm->N = size; 204 } else if (osm->N == PETSC_DECIDE) { 205 PetscInt inwork[2], outwork[2]; 206 /* determine global number of subdomains and the max number of local subdomains */ 207 inwork[0] = inwork[1] = osm->n; 208 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 209 osm->nmax = outwork[0]; 210 osm->N = outwork[1]; 211 } 212 if (!osm->is){ /* create the index sets */ 213 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 214 } 215 if (!osm->is_local) { 216 /* 217 This indicates that osm->is should define a nonoverlapping decomposition 218 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 219 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 220 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 221 Therefore, osm->is will be used to define osm->is_local. 222 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 223 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 224 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 225 */ 226 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 227 for (i=0; i<osm->n; i++) { 228 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 229 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 230 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 231 } else { 232 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 233 osm->is_local[i] = osm->is[i]; 234 } 235 } 236 } 237 /* Beyond this point osm->is_local is not null. */ 238 if (osm->overlap > 0) { 239 /* Extend the "overlapping" regions by a number of steps */ 240 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 241 } 242 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 243 flg = PETSC_FALSE; 244 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 245 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 246 247 if (osm->sort_indices) { 248 for (i=0; i<osm->n; i++) { 249 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 250 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 251 } 252 } 253 /* Merge the ISs, create merged vectors and scatter contexts. */ 254 /* Restriction ISs. */ 255 ddn = 0; 256 for (i=0; i<osm->n; i++) { 257 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 258 ddn += dn; 259 } 260 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr); 261 ddn = 0; 262 for (i=0; i<osm->n; i++) { 263 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 264 ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr); 265 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr); 266 ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr); 267 ddn += dn; 268 } 269 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr); 270 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 271 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr); 272 ierr = VecDuplicate(osm->gx,&osm->gy); CHKERRQ(ierr); 273 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast); CHKERRQ(ierr); 274 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid); CHKERRQ(ierr); 275 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction)); CHKERRQ(ierr); 276 ierr = ISDestroy(gid); CHKERRQ(ierr); 277 /* Prolongation ISs */ 278 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 279 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 280 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 281 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 282 /**/ 283 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 284 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 285 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 286 IS gis_llocal; /* IS with ddidx_llocal indices. */ 287 PetscInt j; 288 ddn_local = 0; 289 for (i=0; i<osm->n; i++) { 290 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 291 ddn_local += dn_local; 292 } 293 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr); 294 ddn_local = 0; 295 for (i=0; i<osm->n; i++) { 296 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 297 ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr); 298 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr); 299 ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr); 300 ddn_local += dn_local; 301 } 302 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr); 303 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 305 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr); 307 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 308 /* Now convert these localized indices into the global indices into the merged output vector. */ 309 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr); 310 for(j=0; j < ddn_llocal; ++j) { 311 ddidx_llocal[j] += gfirst; 312 } 313 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr); 314 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation); CHKERRQ(ierr); 315 ierr = ISDestroy(gis_llocal);CHKERRQ(ierr); 316 } 317 /* Create the subdomain work vectors. */ 318 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 319 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 320 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 321 ierr = VecGetArray(osm->gx, &gxarray); CHKERRQ(ierr); 322 ierr = VecGetArray(osm->gy, &gyarray); CHKERRQ(ierr); 323 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 324 PetscInt dN; 325 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 326 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 327 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 328 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 329 } 330 ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr); 331 ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr); 332 ierr = VecDestroy(x); CHKERRQ(ierr); 333 ierr = VecDestroy(y); CHKERRQ(ierr); 334 /* Create the local solvers */ 335 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 336 for (i=0; i<osm->n; i++) { /* KSPs are local */ 337 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 338 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 339 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 340 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 341 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 342 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 343 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 344 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 345 osm->ksp[i] = ksp; 346 } 347 scall = MAT_INITIAL_MATRIX; 348 349 } else { 350 /* 351 Destroy the blocks from the previous iteration 352 */ 353 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 354 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 355 scall = MAT_INITIAL_MATRIX; 356 } 357 } 358 359 /* 360 Extract out the submatrices. 361 */ 362 if(size > 1) { 363 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 364 } 365 else { 366 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 367 } 368 if (scall == MAT_INITIAL_MATRIX) { 369 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 370 for (i=0; i<osm->n; i++) { 371 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 372 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 373 } 374 } 375 376 /* Return control to the user so that the submatrices can be modified (e.g., to apply 377 different boundary conditions for the submatrices than for the global problem) */ 378 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 379 380 /* 381 Loop over submatrices putting them into local ksp 382 */ 383 for (i=0; i<osm->n; i++) { 384 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 385 if (!pc->setupcalled) { 386 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 387 } 388 } 389 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 395 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 396 { 397 PC_GASM *osm = (PC_GASM*)pc->data; 398 PetscErrorCode ierr; 399 PetscInt i; 400 401 PetscFunctionBegin; 402 for (i=0; i<osm->n; i++) { 403 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "PCApply_GASM" 410 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 411 { 412 PC_GASM *osm = (PC_GASM*)pc->data; 413 PetscErrorCode ierr; 414 PetscInt i; 415 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 416 417 PetscFunctionBegin; 418 /* 419 Support for limiting the restriction or interpolation to only local 420 subdomain values (leaving the other values 0). 421 */ 422 if (!(osm->type & PC_GASM_RESTRICT)) { 423 forward = SCATTER_FORWARD_LOCAL; 424 /* have to zero the work RHS since scatter may leave some slots empty */ 425 ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr); 426 } 427 if (!(osm->type & PC_GASM_INTERPOLATE)) { 428 reverse = SCATTER_REVERSE_LOCAL; 429 } 430 431 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 432 ierr = VecZeroEntries(y);CHKERRQ(ierr); 433 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 434 /* do the subdomain solves */ 435 for (i=0; i<osm->n; ++i) { 436 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 437 } 438 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 439 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "PCApplyTranspose_GASM" 445 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 446 { 447 PC_GASM *osm = (PC_GASM*)pc->data; 448 PetscErrorCode ierr; 449 PetscInt i; 450 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 451 452 PetscFunctionBegin; 453 /* 454 Support for limiting the restriction or interpolation to only local 455 subdomain values (leaving the other values 0). 456 457 Note: these are reversed from the PCApply_GASM() because we are applying the 458 transpose of the three terms 459 */ 460 if (!(osm->type & PC_GASM_INTERPOLATE)) { 461 forward = SCATTER_FORWARD_LOCAL; 462 /* have to zero the work RHS since scatter may leave some slots empty */ 463 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 464 } 465 if (!(osm->type & PC_GASM_RESTRICT)) { 466 reverse = SCATTER_REVERSE_LOCAL; 467 } 468 469 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 470 ierr = VecZeroEntries(y);CHKERRQ(ierr); 471 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 472 /* do the local solves */ 473 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 474 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 475 } 476 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 477 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCDestroy_GASM" 483 static PetscErrorCode PCDestroy_GASM(PC pc) 484 { 485 PC_GASM *osm = (PC_GASM*)pc->data; 486 PetscErrorCode ierr; 487 PetscInt i; 488 489 PetscFunctionBegin; 490 if (osm->ksp) { 491 for (i=0; i<osm->n; i++) { 492 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 495 } 496 if (osm->pmat) { 497 if (osm->n > 0) { 498 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 499 } 500 } 501 for (i=0; i<osm->n; i++) { 502 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 503 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 504 } 505 ierr = PetscFree(osm->x);CHKERRQ(ierr); 506 ierr = PetscFree(osm->y);CHKERRQ(ierr); 507 ierr = VecDestroy(osm->gx); CHKERRQ(ierr); 508 ierr = VecDestroy(osm->gy); CHKERRQ(ierr); 509 510 ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr); 511 ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr); 512 513 if (osm->is) { 514 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 515 } 516 ierr = PetscFree(osm);CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "PCSetFromOptions_GASM" 522 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 523 PC_GASM *osm = (PC_GASM*)pc->data; 524 PetscErrorCode ierr; 525 PetscInt blocks,ovl; 526 PetscBool symset,flg; 527 PCGASMType gasmtype; 528 529 PetscFunctionBegin; 530 /* set the type to symmetric if matrix is symmetric */ 531 if (!osm->type_set && pc->pmat) { 532 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 533 if (symset && flg) { osm->type = PC_GASM_BASIC; } 534 } 535 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 536 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 537 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 538 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 539 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 540 flg = PETSC_FALSE; 541 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 542 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 543 ierr = PetscOptionsTail();CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 /*------------------------------------------------------------------------------------*/ 548 549 EXTERN_C_BEGIN 550 #undef __FUNCT__ 551 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 552 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 553 { 554 PC_GASM *osm = (PC_GASM*)pc->data; 555 PetscErrorCode ierr; 556 PetscInt i; 557 558 PetscFunctionBegin; 559 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 560 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 561 562 if (!pc->setupcalled) { 563 osm->n = n; 564 osm->is = 0; 565 osm->is_local = 0; 566 if (is) { 567 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 568 } 569 if (is_local) { 570 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 571 } 572 if (osm->is) { 573 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 574 } 575 if (is) { 576 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 577 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 578 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 579 osm->overlap = -1; 580 } 581 if (is_local) { 582 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 583 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 EXTERN_C_END 589 590 EXTERN_C_BEGIN 591 #undef __FUNCT__ 592 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 593 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 594 PC_GASM *osm = (PC_GASM*)pc->data; 595 PetscErrorCode ierr; 596 PetscMPIInt rank,size; 597 PetscInt n; 598 599 PetscFunctionBegin; 600 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 601 602 /* 603 Split the subdomains equally among all processors 604 */ 605 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 606 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 607 n = N/size + ((N % size) > rank); 608 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 609 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 610 if (!pc->setupcalled) { 611 if (osm->is) { 612 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 613 } 614 osm->N = N; 615 osm->n = n; 616 osm->is = 0; 617 osm->is_local = 0; 618 } 619 PetscFunctionReturn(0); 620 }/* PCGASMSetTotalSubdomains_GASM() */ 621 EXTERN_C_END 622 623 EXTERN_C_BEGIN 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCGASMSetOverlap_GASM" 626 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 627 { 628 PC_GASM *osm = (PC_GASM*)pc->data; 629 630 PetscFunctionBegin; 631 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 632 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 633 if (!pc->setupcalled) { 634 osm->overlap = ovl; 635 } 636 PetscFunctionReturn(0); 637 } 638 EXTERN_C_END 639 640 EXTERN_C_BEGIN 641 #undef __FUNCT__ 642 #define __FUNCT__ "PCGASMSetType_GASM" 643 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetType_GASM(PC pc,PCGASMType type) 644 { 645 PC_GASM *osm = (PC_GASM*)pc->data; 646 647 PetscFunctionBegin; 648 osm->type = type; 649 osm->type_set = PETSC_TRUE; 650 PetscFunctionReturn(0); 651 } 652 EXTERN_C_END 653 654 EXTERN_C_BEGIN 655 #undef __FUNCT__ 656 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 657 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 658 { 659 PC_GASM *osm = (PC_GASM*)pc->data; 660 661 PetscFunctionBegin; 662 osm->sort_indices = doSort; 663 PetscFunctionReturn(0); 664 } 665 EXTERN_C_END 666 667 EXTERN_C_BEGIN 668 #undef __FUNCT__ 669 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 670 /* 671 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 672 In particular, it would upset the global subdomain number calculation. 673 */ 674 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 675 { 676 PC_GASM *osm = (PC_GASM*)pc->data; 677 PetscErrorCode ierr; 678 679 PetscFunctionBegin; 680 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 681 682 if (n) { 683 *n = osm->n; 684 } 685 if (first) { 686 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 687 *first -= osm->n; 688 } 689 if (ksp) { 690 /* Assume that local solves are now different; not necessarily 691 true though! This flag is used only for PCView_GASM() */ 692 *ksp = osm->ksp; 693 osm->same_local_solves = PETSC_FALSE; 694 } 695 PetscFunctionReturn(0); 696 }/* PCGASMGetSubKSP_GASM() */ 697 EXTERN_C_END 698 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "PCGASMSetLocalSubdomains" 702 /*@C 703 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 704 only) for the additive Schwarz preconditioner. 705 706 Collective on PC 707 708 Input Parameters: 709 + pc - the preconditioner context 710 . n - the number of subdomains for this processor (default value = 1) 711 . is - the index set that defines the subdomains for this processor 712 (or PETSC_NULL for PETSc to determine subdomains) 713 - is_local - the index sets that define the local part of the subdomains for this processor 714 (or PETSC_NULL to use the default of 1 subdomain per process) 715 716 Notes: 717 The IS numbering is in the parallel, global numbering of the vector. 718 719 By default the GASM preconditioner uses 1 block per processor. 720 721 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 722 723 Level: advanced 724 725 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 726 727 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 728 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 729 @*/ 730 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 731 { 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 736 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "PCGASMSetTotalSubdomains" 742 /*@C 743 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 744 additive Schwarz preconditioner. Either all or no processors in the 745 PC communicator must call this routine, with the same index sets. 746 747 Collective on PC 748 749 Input Parameters: 750 + pc - the preconditioner context 751 . n - the number of subdomains for all processors 752 . is - the index sets that define the subdomains for all processor 753 (or PETSC_NULL for PETSc to determine subdomains) 754 - is_local - the index sets that define the local part of the subdomains for this processor 755 (or PETSC_NULL to use the default of 1 subdomain per process) 756 757 Options Database Key: 758 To set the total number of subdomain blocks rather than specify the 759 index sets, use the option 760 . -pc_gasm_blocks <blks> - Sets total blocks 761 762 Notes: 763 Currently you cannot use this to set the actual subdomains with the argument is. 764 765 By default the GASM preconditioner uses 1 block per processor. 766 767 These index sets cannot be destroyed until after completion of the 768 linear solves for which the GASM preconditioner is being used. 769 770 Use PCGASMSetLocalSubdomains() to set local subdomains. 771 772 Level: advanced 773 774 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 775 776 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 777 PCGASMCreateSubdomains2D() 778 @*/ 779 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetTotalSubdomains(PC pc,PetscInt N) 780 { 781 PetscErrorCode ierr; 782 783 PetscFunctionBegin; 784 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 785 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 #undef __FUNCT__ 790 #define __FUNCT__ "PCGASMSetOverlap" 791 /*@ 792 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 793 additive Schwarz preconditioner. Either all or no processors in the 794 PC communicator must call this routine. 795 796 Logically Collective on PC 797 798 Input Parameters: 799 + pc - the preconditioner context 800 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 801 802 Options Database Key: 803 . -pc_gasm_overlap <ovl> - Sets overlap 804 805 Notes: 806 By default the GASM preconditioner uses 1 block per processor. To use 807 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 808 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 809 810 The overlap defaults to 1, so if one desires that no additional 811 overlap be computed beyond what may have been set with a call to 812 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 813 must be set to be 0. In particular, if one does not explicitly set 814 the subdomains an application code, then all overlap would be computed 815 internally by PETSc, and using an overlap of 0 would result in an GASM 816 variant that is equivalent to the block Jacobi preconditioner. 817 818 Note that one can define initial index sets with any overlap via 819 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 820 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 821 if desired. 822 823 Level: intermediate 824 825 .keywords: PC, GASM, set, overlap 826 827 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 828 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 829 @*/ 830 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetOverlap(PC pc,PetscInt ovl) 831 { 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 836 PetscValidLogicalCollectiveInt(pc,ovl,2); 837 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "PCGASMSetType" 843 /*@ 844 PCGASMSetType - Sets the type of restriction and interpolation used 845 for local problems in the additive Schwarz method. 846 847 Logically Collective on PC 848 849 Input Parameters: 850 + pc - the preconditioner context 851 - type - variant of GASM, one of 852 .vb 853 PC_GASM_BASIC - full interpolation and restriction 854 PC_GASM_RESTRICT - full restriction, local processor interpolation 855 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 856 PC_GASM_NONE - local processor restriction and interpolation 857 .ve 858 859 Options Database Key: 860 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 861 862 Level: intermediate 863 864 .keywords: PC, GASM, set, type 865 866 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 867 PCGASMCreateSubdomains2D() 868 @*/ 869 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetType(PC pc,PCGASMType type) 870 { 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 875 PetscValidLogicalCollectiveEnum(pc,type,2); 876 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 #undef __FUNCT__ 881 #define __FUNCT__ "PCGASMSetSortIndices" 882 /*@ 883 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 884 885 Logically Collective on PC 886 887 Input Parameters: 888 + pc - the preconditioner context 889 - doSort - sort the subdomain indices 890 891 Level: intermediate 892 893 .keywords: PC, GASM, set, type 894 895 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 896 PCGASMCreateSubdomains2D() 897 @*/ 898 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetSortIndices(PC pc,PetscBool doSort) 899 { 900 PetscErrorCode ierr; 901 902 PetscFunctionBegin; 903 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 904 PetscValidLogicalCollectiveBool(pc,doSort,2); 905 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "PCGASMGetSubKSP" 911 /*@C 912 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 913 this processor. 914 915 Collective on PC iff first_local is requested 916 917 Input Parameter: 918 . pc - the preconditioner context 919 920 Output Parameters: 921 + n_local - the number of blocks on this processor or PETSC_NULL 922 . first_local - the global number of the first block on this processor or PETSC_NULL, 923 all processors must request or all must pass PETSC_NULL 924 - ksp - the array of KSP contexts 925 926 Note: 927 After PCGASMGetSubKSP() the array of KSPes is not to be freed 928 929 Currently for some matrix implementations only 1 block per processor 930 is supported. 931 932 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 933 934 Level: advanced 935 936 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 937 938 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 939 PCGASMCreateSubdomains2D(), 940 @*/ 941 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 942 { 943 PetscErrorCode ierr; 944 945 PetscFunctionBegin; 946 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 947 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 /* -------------------------------------------------------------------------------------*/ 952 /*MC 953 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 954 its own KSP object. 955 956 Options Database Keys: 957 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 958 . -pc_gasm_blocks <blks> - Sets total blocks 959 . -pc_gasm_overlap <ovl> - Sets overlap 960 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 961 962 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 963 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 964 -pc_gasm_type basic to use the standard GASM. 965 966 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 967 than one processor. Defaults to one block per processor. 968 969 To set options on the solvers for each block append -sub_ to all the KSP, and PC 970 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 971 972 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 973 and set the options directly on the resulting KSP object (you can access its PC 974 with KSPGetPC()) 975 976 977 Level: beginner 978 979 Concepts: additive Schwarz method 980 981 References: 982 An additive variant of the Schwarz alternating method for the case of many subregions 983 M Dryja, OB Widlund - Courant Institute, New York University Technical report 984 985 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 986 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 987 988 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 989 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 990 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 991 992 M*/ 993 994 EXTERN_C_BEGIN 995 #undef __FUNCT__ 996 #define __FUNCT__ "PCCreate_GASM" 997 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_GASM(PC pc) 998 { 999 PetscErrorCode ierr; 1000 PC_GASM *osm; 1001 1002 PetscFunctionBegin; 1003 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1004 osm->N = PETSC_DECIDE; 1005 osm->n = 0; 1006 osm->nmax = 0; 1007 osm->overlap = 1; 1008 osm->ksp = 0; 1009 osm->grestriction = 0; 1010 osm->gprolongation = 0; 1011 osm->gx = 0; 1012 osm->gy = 0; 1013 osm->x = 0; 1014 osm->y = 0; 1015 osm->is = 0; 1016 osm->is_local = 0; 1017 osm->pmat = 0; 1018 osm->type = PC_GASM_RESTRICT; 1019 osm->same_local_solves = PETSC_TRUE; 1020 osm->sort_indices = PETSC_TRUE; 1021 1022 pc->data = (void*)osm; 1023 pc->ops->apply = PCApply_GASM; 1024 pc->ops->applytranspose = PCApplyTranspose_GASM; 1025 pc->ops->setup = PCSetUp_GASM; 1026 pc->ops->destroy = PCDestroy_GASM; 1027 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1028 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1029 pc->ops->view = PCView_GASM; 1030 pc->ops->applyrichardson = 0; 1031 1032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1033 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1034 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1035 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1036 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1037 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1038 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1039 PCGASMSetType_GASM);CHKERRQ(ierr); 1040 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1041 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1042 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1043 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 EXTERN_C_END 1047 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "PCGASMCreateSubdomains" 1051 /*@C 1052 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1053 preconditioner for a any problem on a general grid. 1054 1055 Collective 1056 1057 Input Parameters: 1058 + A - The global matrix operator 1059 - n - the number of local blocks 1060 1061 Output Parameters: 1062 . outis - the array of index sets defining the subdomains 1063 1064 Level: advanced 1065 1066 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1067 from these if you use PCGASMSetLocalSubdomains() 1068 1069 In the Fortran version you must provide the array outis[] already allocated of length n. 1070 1071 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1072 1073 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1074 @*/ 1075 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1076 { 1077 MatPartitioning mpart; 1078 const char *prefix; 1079 PetscErrorCode (*f)(Mat,PetscBool *,MatReuse,Mat*); 1080 PetscMPIInt size; 1081 PetscInt i,j,rstart,rend,bs; 1082 PetscBool iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1083 Mat Ad = PETSC_NULL, adj; 1084 IS ispart,isnumb,*is; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1089 PetscValidPointer(outis,3); 1090 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1091 1092 /* Get prefix, row distribution, and block size */ 1093 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1094 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1095 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1096 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1097 1098 /* Get diagonal block from matrix if possible */ 1099 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1100 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1101 if (f) { 1102 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1103 } else if (size == 1) { 1104 iscopy = PETSC_FALSE; Ad = A; 1105 } else { 1106 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1107 } 1108 if (Ad) { 1109 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1110 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1111 } 1112 if (Ad && n > 1) { 1113 PetscBool match,done; 1114 /* Try to setup a good matrix partitioning if available */ 1115 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1116 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1117 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1118 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1119 if (!match) { 1120 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1121 } 1122 if (!match) { /* assume a "good" partitioner is available */ 1123 PetscInt na,*ia,*ja; 1124 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1125 if (done) { 1126 /* Build adjacency matrix by hand. Unfortunately a call to 1127 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1128 remove the block-aij structure and we cannot expect 1129 MatPartitioning to split vertices as we need */ 1130 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1131 nnz = 0; 1132 for (i=0; i<na; i++) { /* count number of nonzeros */ 1133 len = ia[i+1] - ia[i]; 1134 row = ja + ia[i]; 1135 for (j=0; j<len; j++) { 1136 if (row[j] == i) { /* don't count diagonal */ 1137 len--; break; 1138 } 1139 } 1140 nnz += len; 1141 } 1142 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1143 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1144 nnz = 0; 1145 iia[0] = 0; 1146 for (i=0; i<na; i++) { /* fill adjacency */ 1147 cnt = 0; 1148 len = ia[i+1] - ia[i]; 1149 row = ja + ia[i]; 1150 for (j=0; j<len; j++) { 1151 if (row[j] != i) { /* if not diagonal */ 1152 jja[nnz+cnt++] = row[j]; 1153 } 1154 } 1155 nnz += cnt; 1156 iia[i+1] = nnz; 1157 } 1158 /* Partitioning of the adjacency matrix */ 1159 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1160 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1161 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1162 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1163 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1164 ierr = MatDestroy(adj);CHKERRQ(ierr); 1165 foundpart = PETSC_TRUE; 1166 } 1167 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1168 } 1169 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1170 } 1171 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1172 1173 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1174 *outis = is; 1175 1176 if (!foundpart) { 1177 1178 /* Partitioning by contiguous chunks of rows */ 1179 1180 PetscInt mbs = (rend-rstart)/bs; 1181 PetscInt start = rstart; 1182 for (i=0; i<n; i++) { 1183 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1184 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1185 start += count; 1186 } 1187 1188 } else { 1189 1190 /* Partitioning by adjacency of diagonal block */ 1191 1192 const PetscInt *numbering; 1193 PetscInt *count,nidx,*indices,*newidx,start=0; 1194 /* Get node count in each partition */ 1195 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1196 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1197 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1198 for (i=0; i<n; i++) count[i] *= bs; 1199 } 1200 /* Build indices from node numbering */ 1201 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1202 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1203 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1204 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1205 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1206 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1207 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1208 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1209 for (i=0; i<nidx; i++) 1210 for (j=0; j<bs; j++) 1211 newidx[i*bs+j] = indices[i]*bs + j; 1212 ierr = PetscFree(indices);CHKERRQ(ierr); 1213 nidx *= bs; 1214 indices = newidx; 1215 } 1216 /* Shift to get global indices */ 1217 for (i=0; i<nidx; i++) indices[i] += rstart; 1218 1219 /* Build the index sets for each block */ 1220 for (i=0; i<n; i++) { 1221 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1222 ierr = ISSort(is[i]);CHKERRQ(ierr); 1223 start += count[i]; 1224 } 1225 1226 ierr = PetscFree(count); 1227 ierr = PetscFree(indices); 1228 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1229 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1230 1231 } 1232 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "PCGASMDestroySubdomains" 1238 /*@C 1239 PCGASMDestroySubdomains - Destroys the index sets created with 1240 PCGASMCreateSubdomains(). Should be called after setting subdomains 1241 with PCGASMSetLocalSubdomains(). 1242 1243 Collective 1244 1245 Input Parameters: 1246 + n - the number of index sets 1247 . is - the array of index sets 1248 - is_local - the array of local index sets, can be PETSC_NULL 1249 1250 Level: advanced 1251 1252 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1253 1254 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1255 @*/ 1256 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1257 { 1258 PetscInt i; 1259 PetscErrorCode ierr; 1260 PetscFunctionBegin; 1261 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1262 PetscValidPointer(is,2); 1263 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1264 ierr = PetscFree(is);CHKERRQ(ierr); 1265 if (is_local) { 1266 PetscValidPointer(is_local,3); 1267 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1268 ierr = PetscFree(is_local);CHKERRQ(ierr); 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 1274 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1275 { \ 1276 PetscInt first_row = first/M, last_row = last/M+1; \ 1277 /* \ 1278 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1279 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1280 subdomain). \ 1281 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1282 of the intersection. \ 1283 */ \ 1284 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1285 ylow_loc = PetscMax(first_row,ylow); \ 1286 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1287 xleft_loc = ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1288 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1289 yhigh_loc = PetscMin(last_row,yhigh); \ 1290 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1291 xright_loc = yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1292 /* Now compute the size of the local subdomain n. */ \ 1293 n = 0; \ 1294 if(ylow_loc < yhigh_loc) { \ 1295 PetscInt width = xright-xleft; \ 1296 n += width*(yhigh_loc-ylow_loc-1); \ 1297 n += PetscMin(PetscMax(xright_loc-xleft,0),width); \ 1298 n -= PetscMin(PetscMax(xleft_loc-xleft,0), width); \ 1299 }\ 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1304 /*@ 1305 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1306 preconditioner for a two-dimensional problem on a regular grid. 1307 1308 Collective 1309 1310 Input Parameters: 1311 + M, N - the global number of mesh points in the x and y directions 1312 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1313 . dof - degrees of freedom per node 1314 - overlap - overlap in mesh lines 1315 1316 Output Parameters: 1317 + Nsub - the number of local subdomains created 1318 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1319 - is_local - array of index sets defining non-overlapping subdomains 1320 1321 Note: 1322 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1323 preconditioners. More general related routines are 1324 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1325 1326 Level: advanced 1327 1328 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1329 1330 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1331 PCGASMSetOverlap() 1332 @*/ 1333 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1334 { 1335 PetscErrorCode ierr; 1336 PetscMPIInt size, rank; 1337 PetscInt i, j; 1338 PetscInt maxheight, maxwidth; 1339 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1340 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1341 PetscInt x[2][2], y[2][2]; 1342 PetscInt first, last; 1343 PetscInt nidx, *idx; 1344 PetscInt ii,jj,s,q,d; 1345 PetscInt k,kk; 1346 PetscMPIInt color; 1347 MPI_Comm comm, subcomm; 1348 1349 IS **iis; 1350 PetscFunctionBegin; 1351 ierr = PetscObjectGetComm((PetscObject)pc, &comm); CHKERRQ(ierr); 1352 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 1353 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 1354 ierr = MatGetOwnershipRange(pc->pmat, &first, &last); CHKERRQ(ierr); 1355 if(first%dof || last%dof) { 1356 SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1357 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1358 } 1359 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1360 s = 0; 1361 ystart = 0; 1362 for (j=0; j<Ndomains; ++j) { 1363 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1364 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1365 /* Vertical domain limits without an overlap. */ 1366 ylow = ystart; 1367 yhigh = PetscMin(ystart + maxheight,N); 1368 xstart = 0; 1369 for (i=0; i<Mdomains; ++i) { 1370 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1371 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1372 /* Horizontal domain limits without an overlap. */ 1373 xleft = xstart; 1374 xright = PetscMin(xstart+maxwidth,M); 1375 /* 1376 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1377 */ 1378 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,nidx); 1379 if(nidx) { 1380 ++s; 1381 } 1382 xstart += maxwidth; 1383 }/* for(i = 0; i < Mdomains; ++i) */ 1384 ystart += maxheight; 1385 }/* for(j = 0; j < Ndomains; ++j) */ 1386 /* Now we can allocate the necessary number of ISs. */ 1387 *nsub = s; 1388 ierr = PetscMalloc((*nsub)*sizeof(IS*),is); CHKERRQ(ierr); 1389 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local); CHKERRQ(ierr); 1390 s = 0; 1391 ystart = 0; 1392 for (j=0; j<Ndomains; ++j) { 1393 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1394 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1395 /* Vertical domain limits without an overlap. */ 1396 y[0][0] = ystart; 1397 y[0][1] = PetscMin(ystart + maxheight,N); 1398 /* Vertical domain limits with an overlap. */ 1399 y[1][1] = PetscMin(ystart + maxheight + overlap,N); 1400 y[1][0] = PetscMax(ystart - overlap,0); xstart = 0; 1401 for (i=0; i<Mdomains; ++i) { 1402 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1403 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1404 /* Horizontal domain limits without an overlap. */ 1405 x[0][0] = xstart; 1406 x[0][1] = PetscMin(xstart+maxwidth,M); 1407 /* Horizontal domain limits with an overlap. */ 1408 x[1][0] = PetscMax(xstart - overlap,0); 1409 x[1][1] = PetscMin(xstart + maxwidth + overlap,M); 1410 /* 1411 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1412 Do this twice: first for the domains with overlaps, and once without. 1413 During the first pass create the subcommunicators, and use them on the second pass as well. 1414 */ 1415 for(q = 0; q < 2; ++q) { 1416 /* 1417 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1418 according to whether the domain with an overlap or without is considered. 1419 */ 1420 xleft = x[q][0]; xright = x[q][1]; 1421 ylow = y[q][0]; yhigh = y[q][1]; 1422 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,nidx); 1423 nidx *= dof; 1424 /* 1425 Based on the counted number of indices in the local domain (without an overlap), 1426 construct a subcommunicator of all the processors supporting this domain. 1427 */ 1428 if(q == 0) { 1429 if(nidx) { 1430 color = 1; 1431 } 1432 else { 1433 color = MPI_UNDEFINED; 1434 } 1435 ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr); 1436 } 1437 /* 1438 Proceed only if the number of local indices is nonzero. 1439 */ 1440 if(nidx) { 1441 if(q == 0) { 1442 iis = is_local; 1443 } 1444 if(q == 1) { 1445 /* 1446 The IS for the no-overlap subdomain shares a communicator with the nonoverlapping domain. 1447 Moreover, if the overlap is zero, the two ISs are identical. 1448 */ 1449 if(overlap == 0) { 1450 (*is)[s] = (*is_local)[s]; 1451 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1452 continue; 1453 } 1454 else { 1455 iis = is; 1456 subcomm = ((PetscObject)(*is_local)[s])->comm; 1457 } 1458 }/* if(q == 1) */ 1459 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1460 k = 0; 1461 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1462 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1463 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1464 kk = dof*(M*jj + x0); 1465 for (ii=x0; ii<x1; ++ii) { 1466 for(d = 0; d < dof; ++d) { 1467 idx[k++] = kk++; 1468 } 1469 } 1470 } 1471 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1472 }/* if(nidx) */ 1473 }/* for(q = 0; q < 2; ++q) */ 1474 if(nidx) { 1475 ++s; 1476 } 1477 xstart += maxwidth; 1478 }/* for(i = 0; i < Mdomains; ++i) */ 1479 ystart += maxheight; 1480 }/* for(j = 0; j < Ndomains; ++j) */ 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1486 /*@C 1487 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1488 only) for the additive Schwarz preconditioner. 1489 1490 Not Collective 1491 1492 Input Parameter: 1493 . pc - the preconditioner context 1494 1495 Output Parameters: 1496 + n - the number of subdomains for this processor (default value = 1) 1497 . is - the index sets that define the subdomains for this processor 1498 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1499 1500 1501 Notes: 1502 The IS numbering is in the parallel, global numbering of the vector. 1503 1504 Level: advanced 1505 1506 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1507 1508 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1509 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1510 @*/ 1511 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1512 { 1513 PC_GASM *osm; 1514 PetscErrorCode ierr; 1515 PetscBool match; 1516 1517 PetscFunctionBegin; 1518 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1519 PetscValidIntPointer(n,2); 1520 if (is) PetscValidPointer(is,3); 1521 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1522 if (!match) { 1523 if (n) *n = 0; 1524 if (is) *is = PETSC_NULL; 1525 } else { 1526 osm = (PC_GASM*)pc->data; 1527 if (n) *n = osm->n; 1528 if (is) *is = osm->is; 1529 if (is_local) *is_local = osm->is_local; 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1536 /*@C 1537 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1538 only) for the additive Schwarz preconditioner. 1539 1540 Not Collective 1541 1542 Input Parameter: 1543 . pc - the preconditioner context 1544 1545 Output Parameters: 1546 + n - the number of matrices for this processor (default value = 1) 1547 - mat - the matrices 1548 1549 1550 Level: advanced 1551 1552 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1553 1554 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1555 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1556 @*/ 1557 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1558 { 1559 PC_GASM *osm; 1560 PetscErrorCode ierr; 1561 PetscBool match; 1562 1563 PetscFunctionBegin; 1564 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1565 PetscValidIntPointer(n,2); 1566 if (mat) PetscValidPointer(mat,3); 1567 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1568 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1569 if (!match) { 1570 if (n) *n = 0; 1571 if (mat) *mat = PETSC_NULL; 1572 } else { 1573 osm = (PC_GASM*)pc->data; 1574 if (n) *n = osm->n; 1575 if (mat) *mat = osm->pmat; 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579