xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision a06653b4217a6b7095655997faad757ca7c559a5)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);    CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr);
71     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
72     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
73     ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
74     ierr = PetscFree(cidx); CHKERRQ(ierr);
75     ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
76     if (osm->is_local) {
77       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
78       ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr);
79       /*
80        No more than 15 characters per index plus a space.
81        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
82        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
83        For nidx == 0, the whole string 16 '\0'.
84        */
85       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
86       ierr = ISGetIndices(osm->is_local[i], &idx);    CHKERRQ(ierr);
87       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
88       for(j = 0; j < nidx; ++j) {
89         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
90       }
91       ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
92       ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr);
93 
94       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr);
95       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
96       ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
98       ierr = PetscFree(cidx); CHKERRQ(ierr);
99       ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
100     }
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "PCView_GASM"
108 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
109 {
110   PC_GASM         *osm = (PC_GASM*)pc->data;
111   const char     *prefix;
112   PetscErrorCode ierr;
113   PetscMPIInt    rank, size;
114   PetscInt       i,bsz;
115   PetscBool      iascii,isstring, print_subdomains;
116   PetscViewer    sviewer;
117 
118 
119   PetscFunctionBegin;
120   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
121   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
122   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
123   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
124   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
125   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
126   if (iascii) {
127     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
128     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
129     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
130     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
131     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
132     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
133     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
134     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
135     if (osm->same_local_solves) {
136       if (osm->ksp) {
137         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
138         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
139         if (!rank) {
140           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
141           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
142           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
143         }
144         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
145       }
146     } else {
147       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
148       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
149       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
150       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
151       for (i=0; i<osm->nmax; i++) {
152         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
153         if (i < osm->n) {
154           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
155           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
156           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
157           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
158         }
159         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
160       }
161       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
162       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
163     }
164   } else if (isstring) {
165     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
166     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
167     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169   } else {
170     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
171   }
172   ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
173   if(print_subdomains) {
174     ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr);
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 
180 
181 
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PCSetUp_GASM"
185 static PetscErrorCode PCSetUp_GASM(PC pc)
186 {
187   PC_GASM         *osm  = (PC_GASM*)pc->data;
188   PetscErrorCode ierr;
189   PetscBool      symset,flg;
190   PetscInt       i;
191   PetscMPIInt    rank, size;
192   MatReuse       scall = MAT_REUSE_MATRIX;
193   KSP            ksp;
194   PC             subpc;
195   const char     *prefix,*pprefix;
196   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
197   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
198   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
199   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
200   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
201   Vec            x,y;
202   PetscScalar    *gxarray, *gyarray;
203   PetscInt       gfirst, glast;
204 
205   PetscFunctionBegin;
206   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
207   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
208   if (!pc->setupcalled) {
209 
210     if (!osm->type_set) {
211       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
212       if (symset && flg) { osm->type = PC_GASM_BASIC; }
213     }
214 
215     if (osm->N == PETSC_DECIDE && osm->n < 1) {
216       /* no subdomains given, use one per processor */
217       osm->nmax = osm->n = 1;
218       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
219       osm->N = size;
220     } else if (osm->N == PETSC_DECIDE) {
221       PetscInt inwork[2], outwork[2];
222       /* determine global number of subdomains and the max number of local subdomains */
223       inwork[0] = inwork[1] = osm->n;
224       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
225       osm->nmax = outwork[0];
226       osm->N    = outwork[1];
227     }
228     if (!osm->is){ /* create the index sets */
229       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
230     }
231     if (!osm->is_local) {
232       /*
233 	 This indicates that osm->is should define a nonoverlapping decomposition
234 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
235 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
236 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
237 	 Therefore, osm->is will be used to define osm->is_local.
238 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
239 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
240 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
241       */
242       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
243       for (i=0; i<osm->n; i++) {
244         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
245           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
246           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
247         } else {
248           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
249           osm->is_local[i] = osm->is[i];
250         }
251       }
252     }
253     /* Beyond this point osm->is_local is not null. */
254     if (osm->overlap > 0) {
255       /* Extend the "overlapping" regions by a number of steps */
256       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
257     }
258     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
259     flg  = PETSC_FALSE;
260     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
261     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
262 
263     if (osm->sort_indices) {
264       for (i=0; i<osm->n; i++) {
265         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
266 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
267       }
268     }
269     /* Merge the ISs, create merged vectors and scatter contexts. */
270     /* Restriction ISs. */
271     ddn = 0;
272     for (i=0; i<osm->n; i++) {
273       ierr = ISGetLocalSize(osm->is[i],&dn);          CHKERRQ(ierr);
274       ddn += dn;
275     }
276     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr);
277     ddn = 0;
278     for (i=0; i<osm->n; i++) {
279       ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr);
280       ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr);
281       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr);
282       ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr);
283       ddn += dn;
284     }
285     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr);
286     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
287     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr);
288     ierr = VecDuplicate(osm->gx,&osm->gy);                                     CHKERRQ(ierr);
289     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);                     CHKERRQ(ierr);
290     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);              CHKERRQ(ierr);
291     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));     CHKERRQ(ierr);
292     ierr = ISDestroy(gid);                                                     CHKERRQ(ierr);
293     /* Prolongation ISs */
294     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
295       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
296       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
297       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
298       /**/
299       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
300       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
301       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
302       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
303       PetscInt               j;
304       ddn_local = 0;
305       for (i=0; i<osm->n; i++) {
306 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
307 	ddn_local += dn_local;
308       }
309       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr);
310       ddn_local = 0;
311       for (i=0; i<osm->n; i++) {
312 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
313 	ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr);
314 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr);
315 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr);
316 	ddn_local += dn_local;
317       }
318       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr);
319       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr);
320       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
321       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
322       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
323       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
324       /* Now convert these localized indices into the global indices into the merged output vector. */
325       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr);
326       for(j=0; j < ddn_llocal; ++j) {
327 	ddidx_llocal[j] += gfirst;
328       }
329       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr);
330       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);              CHKERRQ(ierr);
331       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
332     }
333     /* Create the subdomain work vectors. */
334     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
335     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
336     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
337     ierr = VecGetArray(osm->gx, &gxarray);                                     CHKERRQ(ierr);
338     ierr = VecGetArray(osm->gy, &gyarray);                                     CHKERRQ(ierr);
339     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
340       PetscInt dN;
341       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
342       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
343       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
344       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
345     }
346     ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr);
347     ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr);
348     ierr = VecDestroy(x); CHKERRQ(ierr);
349     ierr = VecDestroy(y); CHKERRQ(ierr);
350     /* Create the local solvers */
351     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
352     for (i=0; i<osm->n; i++) { /* KSPs are local */
353       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
354       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
355       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
356       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
357       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
358       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
359       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
360       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
361       osm->ksp[i] = ksp;
362     }
363     scall = MAT_INITIAL_MATRIX;
364 
365   } else {
366     /*
367        Destroy the blocks from the previous iteration
368     */
369     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
370       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
371       scall = MAT_INITIAL_MATRIX;
372     }
373   }
374 
375   /*
376      Extract out the submatrices.
377   */
378   if(size > 1) {
379     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
380   }
381   else {
382     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
383   }
384   if (scall == MAT_INITIAL_MATRIX) {
385     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
386     for (i=0; i<osm->n; i++) {
387       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
388       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
389     }
390   }
391 
392   /* Return control to the user so that the submatrices can be modified (e.g., to apply
393      different boundary conditions for the submatrices than for the global problem) */
394   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
395 
396   /*
397      Loop over submatrices putting them into local ksp
398   */
399   for (i=0; i<osm->n; i++) {
400     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
401     if (!pc->setupcalled) {
402       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
403     }
404   }
405 
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
411 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
412 {
413   PC_GASM         *osm = (PC_GASM*)pc->data;
414   PetscErrorCode ierr;
415   PetscInt       i;
416 
417   PetscFunctionBegin;
418   for (i=0; i<osm->n; i++) {
419     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "PCApply_GASM"
426 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
427 {
428   PC_GASM         *osm = (PC_GASM*)pc->data;
429   PetscErrorCode ierr;
430   PetscInt       i;
431   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
432 
433   PetscFunctionBegin;
434   /*
435      Support for limiting the restriction or interpolation to only local
436      subdomain values (leaving the other values 0).
437   */
438   if (!(osm->type & PC_GASM_RESTRICT)) {
439     forward = SCATTER_FORWARD_LOCAL;
440     /* have to zero the work RHS since scatter may leave some slots empty */
441     ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr);
442   }
443   if (!(osm->type & PC_GASM_INTERPOLATE)) {
444     reverse = SCATTER_REVERSE_LOCAL;
445   }
446 
447   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
448   ierr = VecZeroEntries(y);CHKERRQ(ierr);
449   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
450   /* do the subdomain solves */
451   for (i=0; i<osm->n; ++i) {
452     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
453   }
454   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
455   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCApplyTranspose_GASM"
461 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
462 {
463   PC_GASM         *osm = (PC_GASM*)pc->data;
464   PetscErrorCode ierr;
465   PetscInt       i;
466   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
467 
468   PetscFunctionBegin;
469   /*
470      Support for limiting the restriction or interpolation to only local
471      subdomain values (leaving the other values 0).
472 
473      Note: these are reversed from the PCApply_GASM() because we are applying the
474      transpose of the three terms
475   */
476   if (!(osm->type & PC_GASM_INTERPOLATE)) {
477     forward = SCATTER_FORWARD_LOCAL;
478     /* have to zero the work RHS since scatter may leave some slots empty */
479     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
480   }
481   if (!(osm->type & PC_GASM_RESTRICT)) {
482     reverse = SCATTER_REVERSE_LOCAL;
483   }
484 
485   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
486   ierr = VecZeroEntries(y);CHKERRQ(ierr);
487   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
488   /* do the local solves */
489   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
490     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
491   }
492   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
493   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "PCReset_GASM"
499 static PetscErrorCode PCReset_GASM(PC pc)
500 {
501   PC_GASM        *osm = (PC_GASM*)pc->data;
502   PetscErrorCode ierr;
503   PetscInt       i;
504 
505   PetscFunctionBegin;
506   if (osm->ksp) {
507     for (i=0; i<osm->n; i++) {
508       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
509     }
510   }
511   if (osm->pmat) {
512     if (osm->n > 0) {
513       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
514     }
515   }
516   for (i=0; i<osm->n; i++) {
517     ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
518     ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
519   }
520   ierr = VecDestroy(osm->gx); CHKERRQ(ierr);
521   ierr = VecDestroy(osm->gy); CHKERRQ(ierr);
522 
523   ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr);
524   ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr);
525 
526   if (osm->is) {
527     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
528   }
529 
530   if (osm->gis) {
531     ierr = ISDestroy(osm->gis); CHKERRQ(ierr);
532   }
533   if (osm->gis_local) {
534     ierr = ISDestroy(osm->gis_local); CHKERRQ(ierr);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "PCDestroy_GASM"
541 static PetscErrorCode PCDestroy_GASM(PC pc)
542 {
543   PC_GASM         *osm = (PC_GASM*)pc->data;
544   PetscErrorCode ierr;
545   PetscInt       i;
546 
547   PetscFunctionBegin;
548   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
549   if (osm->ksp) {
550     for (i=0; i<osm->n; i++) {
551       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
552     }
553     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
554   }
555   ierr = PetscFree(osm->x);CHKERRQ(ierr);
556   ierr = PetscFree(osm->y);CHKERRQ(ierr);
557   ierr = PetscFree(pc->data);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "PCSetFromOptions_GASM"
563 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
564   PC_GASM         *osm = (PC_GASM*)pc->data;
565   PetscErrorCode ierr;
566   PetscInt       blocks,ovl;
567   PetscBool      symset,flg;
568   PCGASMType      gasmtype;
569 
570   PetscFunctionBegin;
571   /* set the type to symmetric if matrix is symmetric */
572   if (!osm->type_set && pc->pmat) {
573     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
574     if (symset && flg) { osm->type = PC_GASM_BASIC; }
575   }
576   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
577     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
578     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
579     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
580     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
581     flg  = PETSC_FALSE;
582     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
583     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
584   ierr = PetscOptionsTail();CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 /*------------------------------------------------------------------------------------*/
589 
590 EXTERN_C_BEGIN
591 #undef __FUNCT__
592 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
593 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
594 {
595   PC_GASM         *osm = (PC_GASM*)pc->data;
596   PetscErrorCode ierr;
597   PetscInt       i;
598 
599   PetscFunctionBegin;
600   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
601   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
602 
603   if (!pc->setupcalled) {
604     osm->n            = n;
605     osm->is           = 0;
606     osm->is_local     = 0;
607     if (is) {
608       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
609     }
610     if (is_local) {
611       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
612     }
613     if (osm->is) {
614       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
615     }
616     if (is) {
617       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
618       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
619       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
620       osm->overlap = -1;
621     }
622     if (is_local) {
623       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
624       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
625     }
626   }
627   PetscFunctionReturn(0);
628 }
629 EXTERN_C_END
630 
631 EXTERN_C_BEGIN
632 #undef __FUNCT__
633 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
634 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
635   PC_GASM         *osm = (PC_GASM*)pc->data;
636   PetscErrorCode ierr;
637   PetscMPIInt    rank,size;
638   PetscInt       n;
639 
640   PetscFunctionBegin;
641   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
642 
643   /*
644      Split the subdomains equally among all processors
645   */
646   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
647   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
648   n = N/size + ((N % size) > rank);
649   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
650   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
651   if (!pc->setupcalled) {
652     if (osm->is) {
653       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
654     }
655     osm->N            = N;
656     osm->n            = n;
657     osm->is           = 0;
658     osm->is_local     = 0;
659   }
660   PetscFunctionReturn(0);
661 }/* PCGASMSetTotalSubdomains_GASM() */
662 EXTERN_C_END
663 
664 EXTERN_C_BEGIN
665 #undef __FUNCT__
666 #define __FUNCT__ "PCGASMSetOverlap_GASM"
667 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
668 {
669   PC_GASM *osm = (PC_GASM*)pc->data;
670 
671   PetscFunctionBegin;
672   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
673   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
674   if (!pc->setupcalled) {
675     osm->overlap = ovl;
676   }
677   PetscFunctionReturn(0);
678 }
679 EXTERN_C_END
680 
681 EXTERN_C_BEGIN
682 #undef __FUNCT__
683 #define __FUNCT__ "PCGASMSetType_GASM"
684 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
685 {
686   PC_GASM *osm = (PC_GASM*)pc->data;
687 
688   PetscFunctionBegin;
689   osm->type     = type;
690   osm->type_set = PETSC_TRUE;
691   PetscFunctionReturn(0);
692 }
693 EXTERN_C_END
694 
695 EXTERN_C_BEGIN
696 #undef __FUNCT__
697 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
698 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
699 {
700   PC_GASM *osm = (PC_GASM*)pc->data;
701 
702   PetscFunctionBegin;
703   osm->sort_indices = doSort;
704   PetscFunctionReturn(0);
705 }
706 EXTERN_C_END
707 
708 EXTERN_C_BEGIN
709 #undef __FUNCT__
710 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
711 /*
712    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
713         In particular, it would upset the global subdomain number calculation.
714 */
715 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
716 {
717   PC_GASM         *osm = (PC_GASM*)pc->data;
718   PetscErrorCode ierr;
719 
720   PetscFunctionBegin;
721   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
722 
723   if (n) {
724     *n = osm->n;
725   }
726   if (first) {
727     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
728     *first -= osm->n;
729   }
730   if (ksp) {
731     /* Assume that local solves are now different; not necessarily
732        true though!  This flag is used only for PCView_GASM() */
733     *ksp                   = osm->ksp;
734     osm->same_local_solves = PETSC_FALSE;
735   }
736   PetscFunctionReturn(0);
737 }/* PCGASMGetSubKSP_GASM() */
738 EXTERN_C_END
739 
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "PCGASMSetLocalSubdomains"
743 /*@C
744     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
745     only) for the additive Schwarz preconditioner.
746 
747     Collective on PC
748 
749     Input Parameters:
750 +   pc - the preconditioner context
751 .   n - the number of subdomains for this processor (default value = 1)
752 .   is - the index set that defines the subdomains for this processor
753          (or PETSC_NULL for PETSc to determine subdomains)
754 -   is_local - the index sets that define the local part of the subdomains for this processor
755          (or PETSC_NULL to use the default of 1 subdomain per process)
756 
757     Notes:
758     The IS numbering is in the parallel, global numbering of the vector.
759 
760     By default the GASM preconditioner uses 1 block per processor.
761 
762     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
763 
764     Level: advanced
765 
766 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
767 
768 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
769           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
770 @*/
771 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
772 {
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
777   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "PCGASMSetTotalSubdomains"
783 /*@C
784     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
785     additive Schwarz preconditioner.  Either all or no processors in the
786     PC communicator must call this routine, with the same index sets.
787 
788     Collective on PC
789 
790     Input Parameters:
791 +   pc - the preconditioner context
792 .   n - the number of subdomains for all processors
793 .   is - the index sets that define the subdomains for all processor
794          (or PETSC_NULL for PETSc to determine subdomains)
795 -   is_local - the index sets that define the local part of the subdomains for this processor
796          (or PETSC_NULL to use the default of 1 subdomain per process)
797 
798     Options Database Key:
799     To set the total number of subdomain blocks rather than specify the
800     index sets, use the option
801 .    -pc_gasm_blocks <blks> - Sets total blocks
802 
803     Notes:
804     Currently you cannot use this to set the actual subdomains with the argument is.
805 
806     By default the GASM preconditioner uses 1 block per processor.
807 
808     These index sets cannot be destroyed until after completion of the
809     linear solves for which the GASM preconditioner is being used.
810 
811     Use PCGASMSetLocalSubdomains() to set local subdomains.
812 
813     Level: advanced
814 
815 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
816 
817 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
818           PCGASMCreateSubdomains2D()
819 @*/
820 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
821 {
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
826   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "PCGASMSetOverlap"
832 /*@
833     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
834     additive Schwarz preconditioner.  Either all or no processors in the
835     PC communicator must call this routine.
836 
837     Logically Collective on PC
838 
839     Input Parameters:
840 +   pc  - the preconditioner context
841 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
842 
843     Options Database Key:
844 .   -pc_gasm_overlap <ovl> - Sets overlap
845 
846     Notes:
847     By default the GASM preconditioner uses 1 block per processor.  To use
848     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
849     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
850 
851     The overlap defaults to 1, so if one desires that no additional
852     overlap be computed beyond what may have been set with a call to
853     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
854     must be set to be 0.  In particular, if one does not explicitly set
855     the subdomains an application code, then all overlap would be computed
856     internally by PETSc, and using an overlap of 0 would result in an GASM
857     variant that is equivalent to the block Jacobi preconditioner.
858 
859     Note that one can define initial index sets with any overlap via
860     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
861     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
862     if desired.
863 
864     Level: intermediate
865 
866 .keywords: PC, GASM, set, overlap
867 
868 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
869           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
870 @*/
871 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
872 {
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
877   PetscValidLogicalCollectiveInt(pc,ovl,2);
878   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "PCGASMSetType"
884 /*@
885     PCGASMSetType - Sets the type of restriction and interpolation used
886     for local problems in the additive Schwarz method.
887 
888     Logically Collective on PC
889 
890     Input Parameters:
891 +   pc  - the preconditioner context
892 -   type - variant of GASM, one of
893 .vb
894       PC_GASM_BASIC       - full interpolation and restriction
895       PC_GASM_RESTRICT    - full restriction, local processor interpolation
896       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
897       PC_GASM_NONE        - local processor restriction and interpolation
898 .ve
899 
900     Options Database Key:
901 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
902 
903     Level: intermediate
904 
905 .keywords: PC, GASM, set, type
906 
907 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
908           PCGASMCreateSubdomains2D()
909 @*/
910 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
911 {
912   PetscErrorCode ierr;
913 
914   PetscFunctionBegin;
915   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
916   PetscValidLogicalCollectiveEnum(pc,type,2);
917   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "PCGASMSetSortIndices"
923 /*@
924     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
925 
926     Logically Collective on PC
927 
928     Input Parameters:
929 +   pc  - the preconditioner context
930 -   doSort - sort the subdomain indices
931 
932     Level: intermediate
933 
934 .keywords: PC, GASM, set, type
935 
936 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
937           PCGASMCreateSubdomains2D()
938 @*/
939 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
940 {
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
945   PetscValidLogicalCollectiveBool(pc,doSort,2);
946   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "PCGASMGetSubKSP"
952 /*@C
953    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
954    this processor.
955 
956    Collective on PC iff first_local is requested
957 
958    Input Parameter:
959 .  pc - the preconditioner context
960 
961    Output Parameters:
962 +  n_local - the number of blocks on this processor or PETSC_NULL
963 .  first_local - the global number of the first block on this processor or PETSC_NULL,
964                  all processors must request or all must pass PETSC_NULL
965 -  ksp - the array of KSP contexts
966 
967    Note:
968    After PCGASMGetSubKSP() the array of KSPes is not to be freed
969 
970    Currently for some matrix implementations only 1 block per processor
971    is supported.
972 
973    You must call KSPSetUp() before calling PCGASMGetSubKSP().
974 
975    Level: advanced
976 
977 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
978 
979 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
980           PCGASMCreateSubdomains2D(),
981 @*/
982 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
988   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 /* -------------------------------------------------------------------------------------*/
993 /*MC
994    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
995            its own KSP object.
996 
997    Options Database Keys:
998 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
999 .  -pc_gasm_blocks <blks> - Sets total blocks
1000 .  -pc_gasm_overlap <ovl> - Sets overlap
1001 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1002 
1003      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1004       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1005       -pc_gasm_type basic to use the standard GASM.
1006 
1007    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1008      than one processor. Defaults to one block per processor.
1009 
1010      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1011         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1012 
1013      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1014          and set the options directly on the resulting KSP object (you can access its PC
1015          with KSPGetPC())
1016 
1017 
1018    Level: beginner
1019 
1020    Concepts: additive Schwarz method
1021 
1022     References:
1023     An additive variant of the Schwarz alternating method for the case of many subregions
1024     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1025 
1026     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1027     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1028 
1029 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1030            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1031            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1032 
1033 M*/
1034 
1035 EXTERN_C_BEGIN
1036 #undef __FUNCT__
1037 #define __FUNCT__ "PCCreate_GASM"
1038 PetscErrorCode  PCCreate_GASM(PC pc)
1039 {
1040   PetscErrorCode ierr;
1041   PC_GASM         *osm;
1042 
1043   PetscFunctionBegin;
1044   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1045   osm->N                 = PETSC_DECIDE;
1046   osm->n                 = 0;
1047   osm->nmax              = 0;
1048   osm->overlap           = 1;
1049   osm->ksp               = 0;
1050   osm->grestriction      = 0;
1051   osm->gprolongation     = 0;
1052   osm->gx                = 0;
1053   osm->gy                = 0;
1054   osm->x                 = 0;
1055   osm->y                 = 0;
1056   osm->is                = 0;
1057   osm->is_local          = 0;
1058   osm->pmat              = 0;
1059   osm->type              = PC_GASM_RESTRICT;
1060   osm->same_local_solves = PETSC_TRUE;
1061   osm->sort_indices      = PETSC_TRUE;
1062 
1063   pc->data                   = (void*)osm;
1064   pc->ops->apply             = PCApply_GASM;
1065   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1066   pc->ops->setup             = PCSetUp_GASM;
1067   pc->ops->reset             = PCReset_GASM;
1068   pc->ops->destroy           = PCDestroy_GASM;
1069   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1070   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1071   pc->ops->view              = PCView_GASM;
1072   pc->ops->applyrichardson   = 0;
1073 
1074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1075                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1077                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1079                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1081                     PCGASMSetType_GASM);CHKERRQ(ierr);
1082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1083                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1085                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 EXTERN_C_END
1089 
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "PCGASMCreateSubdomains"
1093 /*@C
1094    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1095    preconditioner for a any problem on a general grid.
1096 
1097    Collective
1098 
1099    Input Parameters:
1100 +  A - The global matrix operator
1101 -  n - the number of local blocks
1102 
1103    Output Parameters:
1104 .  outis - the array of index sets defining the subdomains
1105 
1106    Level: advanced
1107 
1108    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1109     from these if you use PCGASMSetLocalSubdomains()
1110 
1111     In the Fortran version you must provide the array outis[] already allocated of length n.
1112 
1113 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1114 
1115 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1116 @*/
1117 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1118 {
1119   MatPartitioning           mpart;
1120   const char                *prefix;
1121   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1122   PetscMPIInt               size;
1123   PetscInt                  i,j,rstart,rend,bs;
1124   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1125   Mat                       Ad = PETSC_NULL, adj;
1126   IS                        ispart,isnumb,*is;
1127   PetscErrorCode            ierr;
1128 
1129   PetscFunctionBegin;
1130   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1131   PetscValidPointer(outis,3);
1132   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1133 
1134   /* Get prefix, row distribution, and block size */
1135   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1136   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1137   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1138   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1139 
1140   /* Get diagonal block from matrix if possible */
1141   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1142   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1143   if (f) {
1144     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1145   } else if (size == 1) {
1146     iscopy = PETSC_FALSE; Ad = A;
1147   } else {
1148     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1149   }
1150   if (Ad) {
1151     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1152     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1153   }
1154   if (Ad && n > 1) {
1155     PetscBool  match,done;
1156     /* Try to setup a good matrix partitioning if available */
1157     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1158     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1159     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1160     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1161     if (!match) {
1162       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1163     }
1164     if (!match) { /* assume a "good" partitioner is available */
1165       PetscInt na,*ia,*ja;
1166       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1167       if (done) {
1168 	/* Build adjacency matrix by hand. Unfortunately a call to
1169 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1170 	   remove the block-aij structure and we cannot expect
1171 	   MatPartitioning to split vertices as we need */
1172 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1173 	nnz = 0;
1174 	for (i=0; i<na; i++) { /* count number of nonzeros */
1175 	  len = ia[i+1] - ia[i];
1176 	  row = ja + ia[i];
1177 	  for (j=0; j<len; j++) {
1178 	    if (row[j] == i) { /* don't count diagonal */
1179 	      len--; break;
1180 	    }
1181 	  }
1182 	  nnz += len;
1183 	}
1184 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1185 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1186 	nnz    = 0;
1187 	iia[0] = 0;
1188 	for (i=0; i<na; i++) { /* fill adjacency */
1189 	  cnt = 0;
1190 	  len = ia[i+1] - ia[i];
1191 	  row = ja + ia[i];
1192 	  for (j=0; j<len; j++) {
1193 	    if (row[j] != i) { /* if not diagonal */
1194 	      jja[nnz+cnt++] = row[j];
1195 	    }
1196 	  }
1197 	  nnz += cnt;
1198 	  iia[i+1] = nnz;
1199 	}
1200 	/* Partitioning of the adjacency matrix */
1201 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1202 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1203 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1204 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1205 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1206 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1207 	foundpart = PETSC_TRUE;
1208       }
1209       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1210     }
1211     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1212   }
1213   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1214 
1215   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1216   *outis = is;
1217 
1218   if (!foundpart) {
1219 
1220     /* Partitioning by contiguous chunks of rows */
1221 
1222     PetscInt mbs   = (rend-rstart)/bs;
1223     PetscInt start = rstart;
1224     for (i=0; i<n; i++) {
1225       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1226       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1227       start += count;
1228     }
1229 
1230   } else {
1231 
1232     /* Partitioning by adjacency of diagonal block  */
1233 
1234     const PetscInt *numbering;
1235     PetscInt       *count,nidx,*indices,*newidx,start=0;
1236     /* Get node count in each partition */
1237     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1238     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1239     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1240       for (i=0; i<n; i++) count[i] *= bs;
1241     }
1242     /* Build indices from node numbering */
1243     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1244     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1245     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1246     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1247     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1248     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1249     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1250       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1251       for (i=0; i<nidx; i++)
1252 	for (j=0; j<bs; j++)
1253 	  newidx[i*bs+j] = indices[i]*bs + j;
1254       ierr = PetscFree(indices);CHKERRQ(ierr);
1255       nidx   *= bs;
1256       indices = newidx;
1257     }
1258     /* Shift to get global indices */
1259     for (i=0; i<nidx; i++) indices[i] += rstart;
1260 
1261     /* Build the index sets for each block */
1262     for (i=0; i<n; i++) {
1263       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1264       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1265       start += count[i];
1266     }
1267 
1268     ierr = PetscFree(count);
1269     ierr = PetscFree(indices);
1270     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1271     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1272   }
1273 
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "PCGASMDestroySubdomains"
1279 /*@C
1280    PCGASMDestroySubdomains - Destroys the index sets created with
1281    PCGASMCreateSubdomains(). Should be called after setting subdomains
1282    with PCGASMSetLocalSubdomains().
1283 
1284    Collective
1285 
1286    Input Parameters:
1287 +  n - the number of index sets
1288 .  is - the array of index sets
1289 -  is_local - the array of local index sets, can be PETSC_NULL
1290 
1291    Level: advanced
1292 
1293 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1294 
1295 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1296 @*/
1297 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1298 {
1299   PetscInt       i;
1300   PetscErrorCode ierr;
1301   PetscFunctionBegin;
1302   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1303   PetscValidPointer(is,2);
1304   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1305   ierr = PetscFree(is);CHKERRQ(ierr);
1306   if (is_local) {
1307     PetscValidPointer(is_local,3);
1308     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1309     ierr = PetscFree(is_local);CHKERRQ(ierr);
1310   }
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 
1315 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1316 {                                                                                                       \
1317  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1318   /*                                                                                                    \
1319    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1320    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1321    subdomain).                                                                                          \
1322    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1323    of the intersection.                                                                                 \
1324   */                                                                                                    \
1325   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1326   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1327   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1328   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1329   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1330   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1331   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1332   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1333   /* Now compute the size of the local subdomain n. */ \
1334   *n = 0;                                               \
1335   if(*ylow_loc < *yhigh_loc) {                           \
1336     PetscInt width = xright-xleft;                     \
1337     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1338     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1339     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1340   }\
1341 }
1342 
1343 
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1347 /*@
1348    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1349    preconditioner for a two-dimensional problem on a regular grid.
1350 
1351    Collective
1352 
1353    Input Parameters:
1354 +  M, N - the global number of mesh points in the x and y directions
1355 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1356 .  dof - degrees of freedom per node
1357 -  overlap - overlap in mesh lines
1358 
1359    Output Parameters:
1360 +  Nsub - the number of local subdomains created
1361 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1362 -  is_local - array of index sets defining non-overlapping subdomains
1363 
1364    Note:
1365    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1366    preconditioners.  More general related routines are
1367    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1368 
1369    Level: advanced
1370 
1371 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1372 
1373 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1374           PCGASMSetOverlap()
1375 @*/
1376 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1377 {
1378   PetscErrorCode ierr;
1379   PetscMPIInt size, rank;
1380   PetscInt i, j;
1381   PetscInt maxheight, maxwidth;
1382   PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1383   PetscInt ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1384   PetscInt x[2][2], y[2][2], n[2];
1385   PetscInt first, last;
1386   PetscInt nidx, *idx;
1387   PetscInt ii,jj,s,q,d;
1388   PetscInt k,kk;
1389   PetscMPIInt color;
1390   MPI_Comm comm, subcomm;
1391 
1392   IS **iis;
1393   PetscFunctionBegin;
1394   ierr = PetscObjectGetComm((PetscObject)pc, &comm);     CHKERRQ(ierr);
1395   ierr = MPI_Comm_size(comm, &size);                     CHKERRQ(ierr);
1396   ierr = MPI_Comm_rank(comm, &rank);                     CHKERRQ(ierr);
1397   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);  CHKERRQ(ierr);
1398   if(first%dof || last%dof) {
1399     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1400 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1401   }
1402   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1403   s = 0;
1404   ystart = 0;
1405   for (j=0; j<Ndomains; ++j) {
1406     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1407     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1408     /* Vertical domain limits with an overlap. */
1409     ylow = PetscMax(ystart - overlap,0);
1410     yhigh = PetscMin(ystart + maxheight + overlap,N);
1411     xstart = 0;
1412     for (i=0; i<Mdomains; ++i) {
1413       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1414       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1415       /* Horizontal domain limits with an overlap. */
1416       xleft   = PetscMax(xstart - overlap,0);
1417       xright  = PetscMin(xstart + maxwidth + overlap,M);
1418       /*
1419 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1420       */
1421       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1422       if(nidx) {
1423         ++s;
1424       }
1425       xstart += maxwidth;
1426     }/* for(i = 0; i < Mdomains; ++i) */
1427     ystart += maxheight;
1428   }/* for(j = 0; j < Ndomains; ++j) */
1429   /* Now we can allocate the necessary number of ISs. */
1430   *nsub = s;
1431   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);         CHKERRQ(ierr);
1432   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);   CHKERRQ(ierr);
1433   s = 0;
1434   ystart = 0;
1435   for (j=0; j<Ndomains; ++j) {
1436     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1437     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1438     /* Vertical domain limits with an overlap. */
1439     y[0][0] = PetscMax(ystart - overlap,0);
1440     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1441     /* Vertical domain limits without an overlap. */
1442     y[1][0] = ystart;
1443     y[1][1] = PetscMin(ystart + maxheight,N);
1444     xstart = 0;
1445     for (i=0; i<Mdomains; ++i) {
1446       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1447       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1448       /* Horizontal domain limits with an overlap. */
1449       x[0][0]  = PetscMax(xstart - overlap,0);
1450       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1451       /* Horizontal domain limits without an overlap. */
1452       x[1][0] = xstart;
1453       x[1][1] = PetscMin(xstart+maxwidth,M);
1454       /*
1455 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1456 	 Do this twice: first for the domains with overlaps, and once without.
1457 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1458       */
1459       for(q = 0; q < 2; ++q) {
1460 	/*
1461 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1462 	  according to whether the domain with an overlap or without is considered.
1463 	*/
1464 	xleft = x[q][0]; xright = x[q][1];
1465 	ylow  = y[q][0]; yhigh  = y[q][1];
1466         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1467 	nidx *= dof;
1468         n[q] = nidx;
1469         /*
1470          Based on the counted number of indices in the local domain *with an overlap*,
1471          construct a subcommunicator of all the processors supporting this domain.
1472          Observe that a domain with an overlap might have nontrivial local support,
1473          while the domain without an overlap might not.  Hence, the decision to participate
1474          in the subcommunicator must be based on the domain with an overlap.
1475          */
1476 	if(q == 0) {
1477 	  if(nidx) {
1478 	    color = 1;
1479 	  }
1480 	  else {
1481 	    color = MPI_UNDEFINED;
1482 	  }
1483 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr);
1484 	}
1485         /*
1486          Proceed only if the number of local indices *with an overlap* is nonzero.
1487          */
1488         if(n[0]) {
1489           if(q == 0) {
1490             iis = is;
1491           }
1492           if(q == 1) {
1493             /*
1494              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1495              Moreover, if the overlap is zero, the two ISs are identical.
1496              */
1497             if(overlap == 0) {
1498               (*is_local)[s] = (*is)[s];
1499               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1500               continue;
1501             }
1502             else {
1503               iis = is_local;
1504               subcomm = ((PetscObject)(*is)[s])->comm;
1505             }
1506           }/* if(q == 1) */
1507           idx = PETSC_NULL;
1508 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1509           if(nidx) {
1510             k    = 0;
1511             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1512               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1513               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1514               kk = dof*(M*jj + x0);
1515               for (ii=x0; ii<x1; ++ii) {
1516                 for(d = 0; d < dof; ++d) {
1517                   idx[k++] = kk++;
1518                 }
1519               }
1520             }
1521           }
1522 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1523 	}/* if(n[0]) */
1524       }/* for(q = 0; q < 2; ++q) */
1525       if(n[0]) {
1526         ++s;
1527       }
1528       xstart += maxwidth;
1529     }/* for(i = 0; i < Mdomains; ++i) */
1530     ystart += maxheight;
1531   }/* for(j = 0; j < Ndomains; ++j) */
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1537 /*@C
1538     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1539     only) for the additive Schwarz preconditioner.
1540 
1541     Not Collective
1542 
1543     Input Parameter:
1544 .   pc - the preconditioner context
1545 
1546     Output Parameters:
1547 +   n - the number of subdomains for this processor (default value = 1)
1548 .   is - the index sets that define the subdomains for this processor
1549 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1550 
1551 
1552     Notes:
1553     The IS numbering is in the parallel, global numbering of the vector.
1554 
1555     Level: advanced
1556 
1557 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1558 
1559 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1560           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1561 @*/
1562 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1563 {
1564   PC_GASM         *osm;
1565   PetscErrorCode ierr;
1566   PetscBool      match;
1567 
1568   PetscFunctionBegin;
1569   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1570   PetscValidIntPointer(n,2);
1571   if (is) PetscValidPointer(is,3);
1572   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1573   if (!match) {
1574     if (n)  *n  = 0;
1575     if (is) *is = PETSC_NULL;
1576   } else {
1577     osm = (PC_GASM*)pc->data;
1578     if (n)  *n  = osm->n;
1579     if (is) *is = osm->is;
1580     if (is_local) *is_local = osm->is_local;
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1587 /*@C
1588     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1589     only) for the additive Schwarz preconditioner.
1590 
1591     Not Collective
1592 
1593     Input Parameter:
1594 .   pc - the preconditioner context
1595 
1596     Output Parameters:
1597 +   n - the number of matrices for this processor (default value = 1)
1598 -   mat - the matrices
1599 
1600 
1601     Level: advanced
1602 
1603 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1604 
1605 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1606           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1607 @*/
1608 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1609 {
1610   PC_GASM         *osm;
1611   PetscErrorCode ierr;
1612   PetscBool      match;
1613 
1614   PetscFunctionBegin;
1615   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1616   PetscValidIntPointer(n,2);
1617   if (mat) PetscValidPointer(mat,3);
1618   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1619   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1620   if (!match) {
1621     if (n)   *n   = 0;
1622     if (mat) *mat = PETSC_NULL;
1623   } else {
1624     osm = (PC_GASM*)pc->data;
1625     if (n)   *n   = osm->n;
1626     if (mat) *mat = osm->pmat;
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630