xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 4827ddca42da2d0ec0d1a0d9b26e4c5ddc271c44)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(sviewer);CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr);
71     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
72     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
73     ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
74     ierr = PetscFree(cidx);CHKERRQ(ierr);
75     ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
76     if (osm->is_local) {
77       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
78       ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
79       /*
80        No more than 15 characters per index plus a space.
81        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
82        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
83        For nidx == 0, the whole string 16 '\0'.
84        */
85       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
86       ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
87       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
88       for(j = 0; j < nidx; ++j) {
89         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
90       }
91       ierr = PetscViewerDestroy(sviewer);CHKERRQ(ierr);
92       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
93 
94       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr);
95       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
96       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
98       ierr = PetscFree(cidx);CHKERRQ(ierr);
99       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
100     }
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "PCView_GASM"
108 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
109 {
110   PC_GASM         *osm = (PC_GASM*)pc->data;
111   const char     *prefix;
112   PetscErrorCode ierr;
113   PetscMPIInt    rank, size;
114   PetscInt       i,bsz;
115   PetscBool      iascii,isstring, print_subdomains;
116   PetscViewer    sviewer;
117 
118 
119   PetscFunctionBegin;
120   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
121   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
122   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
123   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
124   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
125   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
126   if (iascii) {
127     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
128     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
129     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
130     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
131     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
132     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
133     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
134     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
135     if (osm->same_local_solves) {
136       if (osm->ksp) {
137         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
138         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
139         if (!rank) {
140           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
141           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
142           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
143         }
144         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
145       }
146     } else {
147       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
148       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
149       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
150       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
151       for (i=0; i<osm->nmax; i++) {
152         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
153         if (i < osm->n) {
154           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
155           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
156           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
157           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
158         }
159         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
160       }
161       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
162       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
163     }
164   } else if (isstring) {
165     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
166     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
167     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169   } else {
170     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
171   }
172   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
173   if(print_subdomains) {
174     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 
180 
181 
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PCSetUp_GASM"
185 static PetscErrorCode PCSetUp_GASM(PC pc)
186 {
187   PC_GASM         *osm  = (PC_GASM*)pc->data;
188   PetscErrorCode ierr;
189   PetscBool      symset,flg;
190   PetscInt       i;
191   PetscMPIInt    rank, size;
192   MatReuse       scall = MAT_REUSE_MATRIX;
193   KSP            ksp;
194   PC             subpc;
195   const char     *prefix,*pprefix;
196   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
197   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
198   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
199   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
200   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
201   Vec            x,y;
202   PetscScalar    *gxarray, *gyarray;
203   PetscInt       gfirst, glast;
204 
205   PetscFunctionBegin;
206   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
207   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
208   if (!pc->setupcalled) {
209 
210     if (!osm->type_set) {
211       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
212       if (symset && flg) { osm->type = PC_GASM_BASIC; }
213     }
214 
215     if (osm->N == PETSC_DECIDE && osm->n < 1) {
216       /* no subdomains given, use one per processor */
217       osm->nmax = osm->n = 1;
218       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
219       osm->N = size;
220     } else if (osm->N == PETSC_DECIDE) {
221       PetscInt inwork[2], outwork[2];
222       /* determine global number of subdomains and the max number of local subdomains */
223       inwork[0] = inwork[1] = osm->n;
224       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
225       osm->nmax = outwork[0];
226       osm->N    = outwork[1];
227     }
228     if (!osm->is){ /* create the index sets */
229       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
230     }
231     if (!osm->is_local) {
232       /*
233 	 This indicates that osm->is should define a nonoverlapping decomposition
234 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
235 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
236 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
237 	 Therefore, osm->is will be used to define osm->is_local.
238 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
239 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
240 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
241       */
242       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
243       for (i=0; i<osm->n; i++) {
244         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
245           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
246           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
247         } else {
248           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
249           osm->is_local[i] = osm->is[i];
250         }
251       }
252     }
253     /* Beyond this point osm->is_local is not null. */
254     if (osm->overlap > 0) {
255       /* Extend the "overlapping" regions by a number of steps */
256       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
257     }
258     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
259     flg  = PETSC_FALSE;
260     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
261     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
262 
263     if (osm->sort_indices) {
264       for (i=0; i<osm->n; i++) {
265         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
266 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
267       }
268     }
269     /* Merge the ISs, create merged vectors and scatter contexts. */
270     /* Restriction ISs. */
271     ddn = 0;
272     for (i=0; i<osm->n; i++) {
273       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
274       ddn += dn;
275     }
276     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
277     ddn = 0;
278     for (i=0; i<osm->n; i++) {
279       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
280       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
281       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
282       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
283       ddn += dn;
284     }
285     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
286     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
287     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
288     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
289     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
290     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
291     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
292     ierr = ISDestroy(gid);CHKERRQ(ierr);
293     /* Prolongation ISs */
294     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
295       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
296       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
297       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
298       /**/
299       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
300       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
301       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
302       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
303       PetscInt               j;
304       ddn_local = 0;
305       for (i=0; i<osm->n; i++) {
306 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
307 	ddn_local += dn_local;
308       }
309       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
310       ddn_local = 0;
311       for (i=0; i<osm->n; i++) {
312 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
313 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
314 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
315 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
316 	ddn_local += dn_local;
317       }
318       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
319       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
320       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
321       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
322       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
323       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
324       /* Now convert these localized indices into the global indices into the merged output vector. */
325       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
326       for(j=0; j < ddn_llocal; ++j) {
327 	ddidx_llocal[j] += gfirst;
328       }
329       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
330       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
331       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
332     }
333     /* Create the subdomain work vectors. */
334     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
335     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
336     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
337     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
338     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
339     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
340       PetscInt dN;
341       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
342       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
343       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
344       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
345     }
346     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
347     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
348     ierr = VecDestroy(x);CHKERRQ(ierr);
349     ierr = VecDestroy(y);CHKERRQ(ierr);
350     /* Create the local solvers */
351     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
352     for (i=0; i<osm->n; i++) { /* KSPs are local */
353       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
354       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
355       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
356       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
357       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
358       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
359       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
360       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
361       osm->ksp[i] = ksp;
362     }
363     scall = MAT_INITIAL_MATRIX;
364 
365   } else {
366     /*
367        Destroy the blocks from the previous iteration
368     */
369     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
370       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
371       scall = MAT_INITIAL_MATRIX;
372     }
373   }
374 
375   /*
376      Extract out the submatrices.
377   */
378   if(size > 1) {
379     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
380   }
381   else {
382     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
383   }
384   if (scall == MAT_INITIAL_MATRIX) {
385     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
386     for (i=0; i<osm->n; i++) {
387       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
388       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
389     }
390   }
391 
392   /* Return control to the user so that the submatrices can be modified (e.g., to apply
393      different boundary conditions for the submatrices than for the global problem) */
394   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
395 
396   /*
397      Loop over submatrices putting them into local ksp
398   */
399   for (i=0; i<osm->n; i++) {
400     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
401     if (!pc->setupcalled) {
402       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
403     }
404   }
405 
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
411 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
412 {
413   PC_GASM         *osm = (PC_GASM*)pc->data;
414   PetscErrorCode ierr;
415   PetscInt       i;
416 
417   PetscFunctionBegin;
418   for (i=0; i<osm->n; i++) {
419     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "PCApply_GASM"
426 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
427 {
428   PC_GASM         *osm = (PC_GASM*)pc->data;
429   PetscErrorCode ierr;
430   PetscInt       i;
431   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
432 
433   PetscFunctionBegin;
434   /*
435      Support for limiting the restriction or interpolation to only local
436      subdomain values (leaving the other values 0).
437   */
438   if (!(osm->type & PC_GASM_RESTRICT)) {
439     forward = SCATTER_FORWARD_LOCAL;
440     /* have to zero the work RHS since scatter may leave some slots empty */
441     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
442   }
443   if (!(osm->type & PC_GASM_INTERPOLATE)) {
444     reverse = SCATTER_REVERSE_LOCAL;
445   }
446 
447   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
448   ierr = VecZeroEntries(y);CHKERRQ(ierr);
449   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
450   /* do the subdomain solves */
451   for (i=0; i<osm->n; ++i) {
452     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
453   }
454   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
455   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCApplyTranspose_GASM"
461 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
462 {
463   PC_GASM         *osm = (PC_GASM*)pc->data;
464   PetscErrorCode ierr;
465   PetscInt       i;
466   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
467 
468   PetscFunctionBegin;
469   /*
470      Support for limiting the restriction or interpolation to only local
471      subdomain values (leaving the other values 0).
472 
473      Note: these are reversed from the PCApply_GASM() because we are applying the
474      transpose of the three terms
475   */
476   if (!(osm->type & PC_GASM_INTERPOLATE)) {
477     forward = SCATTER_FORWARD_LOCAL;
478     /* have to zero the work RHS since scatter may leave some slots empty */
479     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
480   }
481   if (!(osm->type & PC_GASM_RESTRICT)) {
482     reverse = SCATTER_REVERSE_LOCAL;
483   }
484 
485   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
486   ierr = VecZeroEntries(y);CHKERRQ(ierr);
487   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
488   /* do the local solves */
489   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
490     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
491   }
492   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
493   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "PCReset_GASM"
499 static PetscErrorCode PCReset_GASM(PC pc)
500 {
501   PC_GASM        *osm = (PC_GASM*)pc->data;
502   PetscErrorCode ierr;
503   PetscInt       i;
504 
505   PetscFunctionBegin;
506   if (osm->ksp) {
507     for (i=0; i<osm->n; i++) {
508       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
509     }
510   }
511   if (osm->pmat) {
512     if (osm->n > 0) {
513       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
514     }
515   }
516   if (osm->x) {
517     for (i=0; i<osm->n; i++) {
518       if (osm->x[i]) {ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);}
519       if (osm->y[i]) {ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);}
520     }
521   }
522   if (osm->gx) {ierr = VecDestroy(osm->gx);CHKERRQ(ierr);}
523   if (osm->gy) {ierr = VecDestroy(osm->gy);CHKERRQ(ierr);}
524 
525   if (osm->grestriction) {ierr = VecScatterDestroy(osm->grestriction);CHKERRQ(ierr);}
526   if (osm->gprolongation) {ierr = VecScatterDestroy(osm->gprolongation);CHKERRQ(ierr);}
527   if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;}
528   if (osm->gis) {ierr = ISDestroy(osm->gis);CHKERRQ(ierr);}
529   if (osm->gis_local) {ierr = ISDestroy(osm->gis_local);CHKERRQ(ierr);}
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "PCDestroy_GASM"
535 static PetscErrorCode PCDestroy_GASM(PC pc)
536 {
537   PC_GASM         *osm = (PC_GASM*)pc->data;
538   PetscErrorCode ierr;
539   PetscInt       i;
540 
541   PetscFunctionBegin;
542   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
543   if (osm->ksp) {
544     for (i=0; i<osm->n; i++) {
545       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
546     }
547     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
548   }
549   ierr = PetscFree(osm->x);CHKERRQ(ierr);
550   ierr = PetscFree(osm->y);CHKERRQ(ierr);
551   ierr = PetscFree(pc->data);CHKERRQ(ierr);
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "PCSetFromOptions_GASM"
557 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
558   PC_GASM         *osm = (PC_GASM*)pc->data;
559   PetscErrorCode ierr;
560   PetscInt       blocks,ovl;
561   PetscBool      symset,flg;
562   PCGASMType      gasmtype;
563 
564   PetscFunctionBegin;
565   /* set the type to symmetric if matrix is symmetric */
566   if (!osm->type_set && pc->pmat) {
567     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
568     if (symset && flg) { osm->type = PC_GASM_BASIC; }
569   }
570   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
571     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
572     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
573     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
574     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
575     flg  = PETSC_FALSE;
576     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
577     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
578   ierr = PetscOptionsTail();CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 /*------------------------------------------------------------------------------------*/
583 
584 EXTERN_C_BEGIN
585 #undef __FUNCT__
586 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
587 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
588 {
589   PC_GASM         *osm = (PC_GASM*)pc->data;
590   PetscErrorCode ierr;
591   PetscInt       i;
592 
593   PetscFunctionBegin;
594   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
595   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
596 
597   if (!pc->setupcalled) {
598     osm->n            = n;
599     osm->is           = 0;
600     osm->is_local     = 0;
601     if (is) {
602       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
603     }
604     if (is_local) {
605       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
606     }
607     if (osm->is) {
608       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
609     }
610     if (is) {
611       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
612       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
613       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
614       osm->overlap = -1;
615     }
616     if (is_local) {
617       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
618       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
619     }
620   }
621   PetscFunctionReturn(0);
622 }
623 EXTERN_C_END
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
628 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
629   PC_GASM         *osm = (PC_GASM*)pc->data;
630   PetscErrorCode ierr;
631   PetscMPIInt    rank,size;
632   PetscInt       n;
633 
634   PetscFunctionBegin;
635   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
636 
637   /*
638      Split the subdomains equally among all processors
639   */
640   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
641   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
642   n = N/size + ((N % size) > rank);
643   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
644   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
645   if (!pc->setupcalled) {
646     if (osm->is) {
647       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
648     }
649     osm->N            = N;
650     osm->n            = n;
651     osm->is           = 0;
652     osm->is_local     = 0;
653   }
654   PetscFunctionReturn(0);
655 }/* PCGASMSetTotalSubdomains_GASM() */
656 EXTERN_C_END
657 
658 EXTERN_C_BEGIN
659 #undef __FUNCT__
660 #define __FUNCT__ "PCGASMSetOverlap_GASM"
661 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
662 {
663   PC_GASM *osm = (PC_GASM*)pc->data;
664 
665   PetscFunctionBegin;
666   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
667   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
668   if (!pc->setupcalled) {
669     osm->overlap = ovl;
670   }
671   PetscFunctionReturn(0);
672 }
673 EXTERN_C_END
674 
675 EXTERN_C_BEGIN
676 #undef __FUNCT__
677 #define __FUNCT__ "PCGASMSetType_GASM"
678 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
679 {
680   PC_GASM *osm = (PC_GASM*)pc->data;
681 
682   PetscFunctionBegin;
683   osm->type     = type;
684   osm->type_set = PETSC_TRUE;
685   PetscFunctionReturn(0);
686 }
687 EXTERN_C_END
688 
689 EXTERN_C_BEGIN
690 #undef __FUNCT__
691 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
692 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
693 {
694   PC_GASM *osm = (PC_GASM*)pc->data;
695 
696   PetscFunctionBegin;
697   osm->sort_indices = doSort;
698   PetscFunctionReturn(0);
699 }
700 EXTERN_C_END
701 
702 EXTERN_C_BEGIN
703 #undef __FUNCT__
704 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
705 /*
706    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
707         In particular, it would upset the global subdomain number calculation.
708 */
709 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
710 {
711   PC_GASM         *osm = (PC_GASM*)pc->data;
712   PetscErrorCode ierr;
713 
714   PetscFunctionBegin;
715   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
716 
717   if (n) {
718     *n = osm->n;
719   }
720   if (first) {
721     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
722     *first -= osm->n;
723   }
724   if (ksp) {
725     /* Assume that local solves are now different; not necessarily
726        true though!  This flag is used only for PCView_GASM() */
727     *ksp                   = osm->ksp;
728     osm->same_local_solves = PETSC_FALSE;
729   }
730   PetscFunctionReturn(0);
731 }/* PCGASMGetSubKSP_GASM() */
732 EXTERN_C_END
733 
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "PCGASMSetLocalSubdomains"
737 /*@C
738     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
739     only) for the additive Schwarz preconditioner.
740 
741     Collective on PC
742 
743     Input Parameters:
744 +   pc - the preconditioner context
745 .   n - the number of subdomains for this processor (default value = 1)
746 .   is - the index set that defines the subdomains for this processor
747          (or PETSC_NULL for PETSc to determine subdomains)
748 -   is_local - the index sets that define the local part of the subdomains for this processor
749          (or PETSC_NULL to use the default of 1 subdomain per process)
750 
751     Notes:
752     The IS numbering is in the parallel, global numbering of the vector.
753 
754     By default the GASM preconditioner uses 1 block per processor.
755 
756     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
757 
758     Level: advanced
759 
760 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
761 
762 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
763           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
764 @*/
765 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
766 {
767   PetscErrorCode ierr;
768 
769   PetscFunctionBegin;
770   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
771   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "PCGASMSetTotalSubdomains"
777 /*@C
778     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
779     additive Schwarz preconditioner.  Either all or no processors in the
780     PC communicator must call this routine, with the same index sets.
781 
782     Collective on PC
783 
784     Input Parameters:
785 +   pc - the preconditioner context
786 .   n - the number of subdomains for all processors
787 .   is - the index sets that define the subdomains for all processor
788          (or PETSC_NULL for PETSc to determine subdomains)
789 -   is_local - the index sets that define the local part of the subdomains for this processor
790          (or PETSC_NULL to use the default of 1 subdomain per process)
791 
792     Options Database Key:
793     To set the total number of subdomain blocks rather than specify the
794     index sets, use the option
795 .    -pc_gasm_blocks <blks> - Sets total blocks
796 
797     Notes:
798     Currently you cannot use this to set the actual subdomains with the argument is.
799 
800     By default the GASM preconditioner uses 1 block per processor.
801 
802     These index sets cannot be destroyed until after completion of the
803     linear solves for which the GASM preconditioner is being used.
804 
805     Use PCGASMSetLocalSubdomains() to set local subdomains.
806 
807     Level: advanced
808 
809 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
810 
811 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
812           PCGASMCreateSubdomains2D()
813 @*/
814 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
815 {
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
820   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "PCGASMSetOverlap"
826 /*@
827     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
828     additive Schwarz preconditioner.  Either all or no processors in the
829     PC communicator must call this routine.
830 
831     Logically Collective on PC
832 
833     Input Parameters:
834 +   pc  - the preconditioner context
835 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
836 
837     Options Database Key:
838 .   -pc_gasm_overlap <ovl> - Sets overlap
839 
840     Notes:
841     By default the GASM preconditioner uses 1 block per processor.  To use
842     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
843     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
844 
845     The overlap defaults to 1, so if one desires that no additional
846     overlap be computed beyond what may have been set with a call to
847     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
848     must be set to be 0.  In particular, if one does not explicitly set
849     the subdomains an application code, then all overlap would be computed
850     internally by PETSc, and using an overlap of 0 would result in an GASM
851     variant that is equivalent to the block Jacobi preconditioner.
852 
853     Note that one can define initial index sets with any overlap via
854     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
855     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
856     if desired.
857 
858     Level: intermediate
859 
860 .keywords: PC, GASM, set, overlap
861 
862 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
863           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
864 @*/
865 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
866 {
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
871   PetscValidLogicalCollectiveInt(pc,ovl,2);
872   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCGASMSetType"
878 /*@
879     PCGASMSetType - Sets the type of restriction and interpolation used
880     for local problems in the additive Schwarz method.
881 
882     Logically Collective on PC
883 
884     Input Parameters:
885 +   pc  - the preconditioner context
886 -   type - variant of GASM, one of
887 .vb
888       PC_GASM_BASIC       - full interpolation and restriction
889       PC_GASM_RESTRICT    - full restriction, local processor interpolation
890       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
891       PC_GASM_NONE        - local processor restriction and interpolation
892 .ve
893 
894     Options Database Key:
895 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
896 
897     Level: intermediate
898 
899 .keywords: PC, GASM, set, type
900 
901 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
902           PCGASMCreateSubdomains2D()
903 @*/
904 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
905 {
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
910   PetscValidLogicalCollectiveEnum(pc,type,2);
911   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "PCGASMSetSortIndices"
917 /*@
918     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
919 
920     Logically Collective on PC
921 
922     Input Parameters:
923 +   pc  - the preconditioner context
924 -   doSort - sort the subdomain indices
925 
926     Level: intermediate
927 
928 .keywords: PC, GASM, set, type
929 
930 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
931           PCGASMCreateSubdomains2D()
932 @*/
933 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
934 {
935   PetscErrorCode ierr;
936 
937   PetscFunctionBegin;
938   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
939   PetscValidLogicalCollectiveBool(pc,doSort,2);
940   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "PCGASMGetSubKSP"
946 /*@C
947    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
948    this processor.
949 
950    Collective on PC iff first_local is requested
951 
952    Input Parameter:
953 .  pc - the preconditioner context
954 
955    Output Parameters:
956 +  n_local - the number of blocks on this processor or PETSC_NULL
957 .  first_local - the global number of the first block on this processor or PETSC_NULL,
958                  all processors must request or all must pass PETSC_NULL
959 -  ksp - the array of KSP contexts
960 
961    Note:
962    After PCGASMGetSubKSP() the array of KSPes is not to be freed
963 
964    Currently for some matrix implementations only 1 block per processor
965    is supported.
966 
967    You must call KSPSetUp() before calling PCGASMGetSubKSP().
968 
969    Level: advanced
970 
971 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
972 
973 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
974           PCGASMCreateSubdomains2D(),
975 @*/
976 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
977 {
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
982   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 /* -------------------------------------------------------------------------------------*/
987 /*MC
988    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
989            its own KSP object.
990 
991    Options Database Keys:
992 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
993 .  -pc_gasm_blocks <blks> - Sets total blocks
994 .  -pc_gasm_overlap <ovl> - Sets overlap
995 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
996 
997      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
998       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
999       -pc_gasm_type basic to use the standard GASM.
1000 
1001    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1002      than one processor. Defaults to one block per processor.
1003 
1004      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1005         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1006 
1007      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1008          and set the options directly on the resulting KSP object (you can access its PC
1009          with KSPGetPC())
1010 
1011 
1012    Level: beginner
1013 
1014    Concepts: additive Schwarz method
1015 
1016     References:
1017     An additive variant of the Schwarz alternating method for the case of many subregions
1018     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1019 
1020     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1021     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1022 
1023 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1024            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1025            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1026 
1027 M*/
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "PCCreate_GASM"
1032 PetscErrorCode  PCCreate_GASM(PC pc)
1033 {
1034   PetscErrorCode ierr;
1035   PC_GASM         *osm;
1036 
1037   PetscFunctionBegin;
1038   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1039   osm->N                 = PETSC_DECIDE;
1040   osm->n                 = 0;
1041   osm->nmax              = 0;
1042   osm->overlap           = 1;
1043   osm->ksp               = 0;
1044   osm->grestriction      = 0;
1045   osm->gprolongation     = 0;
1046   osm->gx                = 0;
1047   osm->gy                = 0;
1048   osm->x                 = 0;
1049   osm->y                 = 0;
1050   osm->is                = 0;
1051   osm->is_local          = 0;
1052   osm->pmat              = 0;
1053   osm->type              = PC_GASM_RESTRICT;
1054   osm->same_local_solves = PETSC_TRUE;
1055   osm->sort_indices      = PETSC_TRUE;
1056 
1057   pc->data                   = (void*)osm;
1058   pc->ops->apply             = PCApply_GASM;
1059   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1060   pc->ops->setup             = PCSetUp_GASM;
1061   pc->ops->reset             = PCReset_GASM;
1062   pc->ops->destroy           = PCDestroy_GASM;
1063   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1064   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1065   pc->ops->view              = PCView_GASM;
1066   pc->ops->applyrichardson   = 0;
1067 
1068   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1069                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1070   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1071                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1073                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1075                     PCGASMSetType_GASM);CHKERRQ(ierr);
1076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1077                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1079                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 EXTERN_C_END
1083 
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "PCGASMCreateSubdomains"
1087 /*@C
1088    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1089    preconditioner for a any problem on a general grid.
1090 
1091    Collective
1092 
1093    Input Parameters:
1094 +  A - The global matrix operator
1095 -  n - the number of local blocks
1096 
1097    Output Parameters:
1098 .  outis - the array of index sets defining the subdomains
1099 
1100    Level: advanced
1101 
1102    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1103     from these if you use PCGASMSetLocalSubdomains()
1104 
1105     In the Fortran version you must provide the array outis[] already allocated of length n.
1106 
1107 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1108 
1109 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1110 @*/
1111 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1112 {
1113   MatPartitioning           mpart;
1114   const char                *prefix;
1115   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1116   PetscMPIInt               size;
1117   PetscInt                  i,j,rstart,rend,bs;
1118   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1119   Mat                       Ad = PETSC_NULL, adj;
1120   IS                        ispart,isnumb,*is;
1121   PetscErrorCode            ierr;
1122 
1123   PetscFunctionBegin;
1124   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1125   PetscValidPointer(outis,3);
1126   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1127 
1128   /* Get prefix, row distribution, and block size */
1129   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1130   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1131   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1132   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1133 
1134   /* Get diagonal block from matrix if possible */
1135   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1136   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1137   if (f) {
1138     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1139   } else if (size == 1) {
1140     iscopy = PETSC_FALSE; Ad = A;
1141   } else {
1142     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1143   }
1144   if (Ad) {
1145     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1146     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1147   }
1148   if (Ad && n > 1) {
1149     PetscBool  match,done;
1150     /* Try to setup a good matrix partitioning if available */
1151     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1152     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1153     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1154     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1155     if (!match) {
1156       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1157     }
1158     if (!match) { /* assume a "good" partitioner is available */
1159       PetscInt na,*ia,*ja;
1160       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1161       if (done) {
1162 	/* Build adjacency matrix by hand. Unfortunately a call to
1163 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1164 	   remove the block-aij structure and we cannot expect
1165 	   MatPartitioning to split vertices as we need */
1166 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1167 	nnz = 0;
1168 	for (i=0; i<na; i++) { /* count number of nonzeros */
1169 	  len = ia[i+1] - ia[i];
1170 	  row = ja + ia[i];
1171 	  for (j=0; j<len; j++) {
1172 	    if (row[j] == i) { /* don't count diagonal */
1173 	      len--; break;
1174 	    }
1175 	  }
1176 	  nnz += len;
1177 	}
1178 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1179 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1180 	nnz    = 0;
1181 	iia[0] = 0;
1182 	for (i=0; i<na; i++) { /* fill adjacency */
1183 	  cnt = 0;
1184 	  len = ia[i+1] - ia[i];
1185 	  row = ja + ia[i];
1186 	  for (j=0; j<len; j++) {
1187 	    if (row[j] != i) { /* if not diagonal */
1188 	      jja[nnz+cnt++] = row[j];
1189 	    }
1190 	  }
1191 	  nnz += cnt;
1192 	  iia[i+1] = nnz;
1193 	}
1194 	/* Partitioning of the adjacency matrix */
1195 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1196 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1197 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1198 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1199 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1200 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1201 	foundpart = PETSC_TRUE;
1202       }
1203       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1204     }
1205     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1206   }
1207   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1208 
1209   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1210   *outis = is;
1211 
1212   if (!foundpart) {
1213 
1214     /* Partitioning by contiguous chunks of rows */
1215 
1216     PetscInt mbs   = (rend-rstart)/bs;
1217     PetscInt start = rstart;
1218     for (i=0; i<n; i++) {
1219       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1220       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1221       start += count;
1222     }
1223 
1224   } else {
1225 
1226     /* Partitioning by adjacency of diagonal block  */
1227 
1228     const PetscInt *numbering;
1229     PetscInt       *count,nidx,*indices,*newidx,start=0;
1230     /* Get node count in each partition */
1231     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1232     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1233     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1234       for (i=0; i<n; i++) count[i] *= bs;
1235     }
1236     /* Build indices from node numbering */
1237     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1238     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1239     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1240     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1241     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1242     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1243     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1244       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1245       for (i=0; i<nidx; i++)
1246 	for (j=0; j<bs; j++)
1247 	  newidx[i*bs+j] = indices[i]*bs + j;
1248       ierr = PetscFree(indices);CHKERRQ(ierr);
1249       nidx   *= bs;
1250       indices = newidx;
1251     }
1252     /* Shift to get global indices */
1253     for (i=0; i<nidx; i++) indices[i] += rstart;
1254 
1255     /* Build the index sets for each block */
1256     for (i=0; i<n; i++) {
1257       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1258       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1259       start += count[i];
1260     }
1261 
1262     ierr = PetscFree(count);
1263     ierr = PetscFree(indices);
1264     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1265     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1266   }
1267 
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "PCGASMDestroySubdomains"
1273 /*@C
1274    PCGASMDestroySubdomains - Destroys the index sets created with
1275    PCGASMCreateSubdomains(). Should be called after setting subdomains
1276    with PCGASMSetLocalSubdomains().
1277 
1278    Collective
1279 
1280    Input Parameters:
1281 +  n - the number of index sets
1282 .  is - the array of index sets
1283 -  is_local - the array of local index sets, can be PETSC_NULL
1284 
1285    Level: advanced
1286 
1287 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1288 
1289 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1290 @*/
1291 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1292 {
1293   PetscInt       i;
1294   PetscErrorCode ierr;
1295   PetscFunctionBegin;
1296   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1297   PetscValidPointer(is,2);
1298   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1299   ierr = PetscFree(is);CHKERRQ(ierr);
1300   if (is_local) {
1301     PetscValidPointer(is_local,3);
1302     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1303     ierr = PetscFree(is_local);CHKERRQ(ierr);
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 
1309 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1310 {                                                                                                       \
1311  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1312   /*                                                                                                    \
1313    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1314    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1315    subdomain).                                                                                          \
1316    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1317    of the intersection.                                                                                 \
1318   */                                                                                                    \
1319   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1320   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1321   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1322   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1323   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1324   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1325   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1326   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1327   /* Now compute the size of the local subdomain n. */ \
1328   *n = 0;                                               \
1329   if(*ylow_loc < *yhigh_loc) {                           \
1330     PetscInt width = xright-xleft;                     \
1331     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1332     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1333     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1334   }\
1335 }
1336 
1337 
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1341 /*@
1342    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1343    preconditioner for a two-dimensional problem on a regular grid.
1344 
1345    Collective
1346 
1347    Input Parameters:
1348 +  M, N - the global number of mesh points in the x and y directions
1349 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1350 .  dof - degrees of freedom per node
1351 -  overlap - overlap in mesh lines
1352 
1353    Output Parameters:
1354 +  Nsub - the number of local subdomains created
1355 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1356 -  is_local - array of index sets defining non-overlapping subdomains
1357 
1358    Note:
1359    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1360    preconditioners.  More general related routines are
1361    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1362 
1363    Level: advanced
1364 
1365 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1366 
1367 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1368           PCGASMSetOverlap()
1369 @*/
1370 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1371 {
1372   PetscErrorCode ierr;
1373   PetscMPIInt    size, rank;
1374   PetscInt       i, j;
1375   PetscInt       maxheight, maxwidth;
1376   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1377   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1378   PetscInt       x[2][2], y[2][2], n[2];
1379   PetscInt       first, last;
1380   PetscInt       nidx, *idx;
1381   PetscInt       ii,jj,s,q,d;
1382   PetscInt       k,kk;
1383   PetscMPIInt    color;
1384   MPI_Comm       comm, subcomm;
1385   IS             **iis = 0;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1389   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1390   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1391   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1392   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1393 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1394 
1395   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1396   s = 0;
1397   ystart = 0;
1398   for (j=0; j<Ndomains; ++j) {
1399     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1400     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1401     /* Vertical domain limits with an overlap. */
1402     ylow = PetscMax(ystart - overlap,0);
1403     yhigh = PetscMin(ystart + maxheight + overlap,N);
1404     xstart = 0;
1405     for (i=0; i<Mdomains; ++i) {
1406       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1407       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1408       /* Horizontal domain limits with an overlap. */
1409       xleft   = PetscMax(xstart - overlap,0);
1410       xright  = PetscMin(xstart + maxwidth + overlap,M);
1411       /*
1412 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1413       */
1414       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1415       if(nidx) {
1416         ++s;
1417       }
1418       xstart += maxwidth;
1419     }/* for(i = 0; i < Mdomains; ++i) */
1420     ystart += maxheight;
1421   }/* for(j = 0; j < Ndomains; ++j) */
1422   /* Now we can allocate the necessary number of ISs. */
1423   *nsub = s;
1424   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1425   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1426   s = 0;
1427   ystart = 0;
1428   for (j=0; j<Ndomains; ++j) {
1429     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1430     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1431     /* Vertical domain limits with an overlap. */
1432     y[0][0] = PetscMax(ystart - overlap,0);
1433     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1434     /* Vertical domain limits without an overlap. */
1435     y[1][0] = ystart;
1436     y[1][1] = PetscMin(ystart + maxheight,N);
1437     xstart = 0;
1438     for (i=0; i<Mdomains; ++i) {
1439       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1440       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1441       /* Horizontal domain limits with an overlap. */
1442       x[0][0]  = PetscMax(xstart - overlap,0);
1443       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1444       /* Horizontal domain limits without an overlap. */
1445       x[1][0] = xstart;
1446       x[1][1] = PetscMin(xstart+maxwidth,M);
1447       /*
1448 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1449 	 Do this twice: first for the domains with overlaps, and once without.
1450 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1451       */
1452       for(q = 0; q < 2; ++q) {
1453 	/*
1454 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1455 	  according to whether the domain with an overlap or without is considered.
1456 	*/
1457 	xleft = x[q][0]; xright = x[q][1];
1458 	ylow  = y[q][0]; yhigh  = y[q][1];
1459         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1460 	nidx *= dof;
1461         n[q] = nidx;
1462         /*
1463          Based on the counted number of indices in the local domain *with an overlap*,
1464          construct a subcommunicator of all the processors supporting this domain.
1465          Observe that a domain with an overlap might have nontrivial local support,
1466          while the domain without an overlap might not.  Hence, the decision to participate
1467          in the subcommunicator must be based on the domain with an overlap.
1468          */
1469 	if (q == 0) {
1470 	  if(nidx) {
1471 	    color = 1;
1472 	  } else {
1473 	    color = MPI_UNDEFINED;
1474 	  }
1475 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1476 	}
1477         /*
1478          Proceed only if the number of local indices *with an overlap* is nonzero.
1479          */
1480         if (n[0]) {
1481           if(q == 0) {
1482             iis = is;
1483           }
1484           if (q == 1) {
1485             /*
1486              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1487              Moreover, if the overlap is zero, the two ISs are identical.
1488              */
1489             if (overlap == 0) {
1490               (*is_local)[s] = (*is)[s];
1491               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1492               continue;
1493             } else {
1494               iis = is_local;
1495               subcomm = ((PetscObject)(*is)[s])->comm;
1496             }
1497           }/* if(q == 1) */
1498           idx = PETSC_NULL;
1499 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1500           if(nidx) {
1501             k    = 0;
1502             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1503               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1504               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1505               kk = dof*(M*jj + x0);
1506               for (ii=x0; ii<x1; ++ii) {
1507                 for(d = 0; d < dof; ++d) {
1508                   idx[k++] = kk++;
1509                 }
1510               }
1511             }
1512           }
1513 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1514 	}/* if(n[0]) */
1515       }/* for(q = 0; q < 2; ++q) */
1516       if(n[0]) {
1517         ++s;
1518       }
1519       xstart += maxwidth;
1520     }/* for(i = 0; i < Mdomains; ++i) */
1521     ystart += maxheight;
1522   }/* for(j = 0; j < Ndomains; ++j) */
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1528 /*@C
1529     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1530     only) for the additive Schwarz preconditioner.
1531 
1532     Not Collective
1533 
1534     Input Parameter:
1535 .   pc - the preconditioner context
1536 
1537     Output Parameters:
1538 +   n - the number of subdomains for this processor (default value = 1)
1539 .   is - the index sets that define the subdomains for this processor
1540 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1541 
1542 
1543     Notes:
1544     The IS numbering is in the parallel, global numbering of the vector.
1545 
1546     Level: advanced
1547 
1548 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1549 
1550 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1551           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1552 @*/
1553 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1554 {
1555   PC_GASM         *osm;
1556   PetscErrorCode ierr;
1557   PetscBool      match;
1558 
1559   PetscFunctionBegin;
1560   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1561   PetscValidIntPointer(n,2);
1562   if (is) PetscValidPointer(is,3);
1563   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1564   if (!match) {
1565     if (n)  *n  = 0;
1566     if (is) *is = PETSC_NULL;
1567   } else {
1568     osm = (PC_GASM*)pc->data;
1569     if (n)  *n  = osm->n;
1570     if (is) *is = osm->is;
1571     if (is_local) *is_local = osm->is_local;
1572   }
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1578 /*@C
1579     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1580     only) for the additive Schwarz preconditioner.
1581 
1582     Not Collective
1583 
1584     Input Parameter:
1585 .   pc - the preconditioner context
1586 
1587     Output Parameters:
1588 +   n - the number of matrices for this processor (default value = 1)
1589 -   mat - the matrices
1590 
1591 
1592     Level: advanced
1593 
1594 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1595 
1596 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1597           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1598 @*/
1599 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1600 {
1601   PC_GASM         *osm;
1602   PetscErrorCode ierr;
1603   PetscBool      match;
1604 
1605   PetscFunctionBegin;
1606   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1607   PetscValidIntPointer(n,2);
1608   if (mat) PetscValidPointer(mat,3);
1609   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1610   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1611   if (!match) {
1612     if (n)   *n   = 0;
1613     if (mat) *mat = PETSC_NULL;
1614   } else {
1615     osm = (PC_GASM*)pc->data;
1616     if (n)   *n   = osm->n;
1617     if (mat) *mat = osm->pmat;
1618   }
1619   PetscFunctionReturn(0);
1620 }
1621