xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 18d3aa3f5b7b6cf3b58215079fab4e2a5c7adae3)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
5   In this version each processor may have any number of subdomains and a subdomain may live on multiple
6   processors.
7 
8        N    - total number of subdomains on all processors
9        n    - actual number of subdomains on this processor
10        nmax - maximum number of subdomains per processor
11 */
12 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
13 
14 typedef struct {
15   PetscInt   N,n,nmax;
16   PetscInt   overlap;             /* overlap requested by user */
17   KSP        *ksp;                /* linear solvers for each block */
18   Vec        gx,gy;               /* Merged work vectors */
19   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
20   IS         gis, gis_local;      /* merged ISs */
21   VecScatter grestriction;        /* merged restriction */
22   VecScatter gprolongation;       /* merged prolongation */
23   IS         *is;                 /* index set that defines each overlapping subdomain */
24   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
25   Mat        *pmat;               /* subdomain block matrices */
26   PCGASMType  type;               /* use reduced interpolation, restriction or both */
27   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
28   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
29   PetscBool  sort_indices;        /* flag to sort subdomain indices */
30 } PC_GASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCGASMPrintSubdomains"
34 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
35 {
36   PC_GASM         *osm  = (PC_GASM*)pc->data;
37   const char     *prefix;
38   char           fname[PETSC_MAX_PATH_LEN+1];
39   PetscViewer    viewer, sviewer;
40   PetscInt       i,j,nidx;
41   const PetscInt *idx;
42   char           *cidx;
43   PetscBool      found;
44   PetscMPIInt    size, rank;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
49   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
50   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
51   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
52   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
53   for (i=0;i<osm->n;++i) {
54     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
55     ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr);
56     /*
57      No more than 15 characters per index plus a space.
58      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
59      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
60      For nidx == 0, the whole string 16 '\0'.
61      */
62     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
63     ierr = ISGetIndices(osm->is[i], &idx);    CHKERRQ(ierr);
64     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
65     for(j = 0; j < nidx; ++j) {
66       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
67     }
68     ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
69     ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr);
70 
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
74     ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
75     ierr = PetscFree(cidx); CHKERRQ(ierr);
76     ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
77     if (osm->is_local) {
78       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
79       ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr);
80       /*
81        No more than 15 characters per index plus a space.
82        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
83        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
84        For nidx == 0, the whole string 16 '\0'.
85        */
86       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
87       ierr = ISGetIndices(osm->is_local[i], &idx);    CHKERRQ(ierr);
88       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
89       for(j = 0; j < nidx; ++j) {
90         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
91       }
92       ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
93       ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr);
94 
95       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr);
96       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
97       ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
98       ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
99       ierr = PetscFree(cidx); CHKERRQ(ierr);
100       ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "PCView_GASM"
109 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
110 {
111   PC_GASM         *osm = (PC_GASM*)pc->data;
112   const char     *prefix;
113   PetscErrorCode ierr;
114   PetscMPIInt    rank, size;
115   PetscInt       i,bsz;
116   PetscBool      iascii,isstring, print_subdomains;
117   PetscViewer    sviewer;
118 
119 
120   PetscFunctionBegin;
121   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
122   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
123   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
124   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
125   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
126   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
127   if (iascii) {
128     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
129     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
130     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
131     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
132     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
133     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
134     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
135     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
136     if (osm->same_local_solves) {
137       if (osm->ksp) {
138         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
139         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
140         if (!rank) {
141           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
142           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
143           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
144         }
145         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
146       }
147     } else {
148       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
149       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
150       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
151       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
152       for (i=0; i<osm->nmax; i++) {
153         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
154         if (i < osm->n) {
155           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
156           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
157           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
158           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
159         }
160         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
161       }
162       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
163       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
164     }
165   } else if (isstring) {
166     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
167     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
168     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
169     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
170   } else {
171     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
172   }
173   ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
174   if(print_subdomains) {
175     ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr);
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 
181 
182 
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "PCSetUp_GASM"
186 static PetscErrorCode PCSetUp_GASM(PC pc)
187 {
188   PC_GASM         *osm  = (PC_GASM*)pc->data;
189   PetscErrorCode ierr;
190   PetscBool      symset,flg;
191   PetscInt       i;
192   PetscMPIInt    rank, size;
193   MatReuse       scall = MAT_REUSE_MATRIX;
194   KSP            ksp;
195   PC             subpc;
196   const char     *prefix,*pprefix;
197   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
198   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
199   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
200   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
201   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
202   Vec            x,y;
203   PetscScalar    *gxarray, *gyarray;
204   PetscInt       gfirst, glast;
205 
206   PetscFunctionBegin;
207   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
208   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
209   if (!pc->setupcalled) {
210 
211     if (!osm->type_set) {
212       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
213       if (symset && flg) { osm->type = PC_GASM_BASIC; }
214     }
215 
216     if (osm->N == PETSC_DECIDE && osm->n < 1) {
217       /* no subdomains given, use one per processor */
218       osm->nmax = osm->n = 1;
219       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
220       osm->N = size;
221     } else if (osm->N == PETSC_DECIDE) {
222       PetscInt inwork[2], outwork[2];
223       /* determine global number of subdomains and the max number of local subdomains */
224       inwork[0] = inwork[1] = osm->n;
225       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
226       osm->nmax = outwork[0];
227       osm->N    = outwork[1];
228     }
229     if (!osm->is){ /* create the index sets */
230       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
231     }
232     if (!osm->is_local) {
233       /*
234 	 This indicates that osm->is should define a nonoverlapping decomposition
235 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
236 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
237 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
238 	 Therefore, osm->is will be used to define osm->is_local.
239 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
240 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
241 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
242       */
243       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
244       for (i=0; i<osm->n; i++) {
245         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
246           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
247           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
248         } else {
249           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
250           osm->is_local[i] = osm->is[i];
251         }
252       }
253     }
254     /* Beyond this point osm->is_local is not null. */
255     if (osm->overlap > 0) {
256       /* Extend the "overlapping" regions by a number of steps */
257       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
258     }
259     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
260     flg  = PETSC_FALSE;
261     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
262     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
263 
264     if (osm->sort_indices) {
265       for (i=0; i<osm->n; i++) {
266         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
267 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
268       }
269     }
270     /* Merge the ISs, create merged vectors and scatter contexts. */
271     /* Restriction ISs. */
272     ddn = 0;
273     for (i=0; i<osm->n; i++) {
274       ierr = ISGetLocalSize(osm->is[i],&dn);          CHKERRQ(ierr);
275       ddn += dn;
276     }
277     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr);
278     ddn = 0;
279     for (i=0; i<osm->n; i++) {
280       ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr);
281       ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr);
282       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr);
283       ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr);
284       ddn += dn;
285     }
286     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr);
287     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
288     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr);
289     ierr = VecDuplicate(osm->gx,&osm->gy);                                     CHKERRQ(ierr);
290     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);                     CHKERRQ(ierr);
291     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);              CHKERRQ(ierr);
292     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));     CHKERRQ(ierr);
293     ierr = ISDestroy(gid);                                                     CHKERRQ(ierr);
294     /* Prolongation ISs */
295     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
296       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
297       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
298       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
299       /**/
300       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
301       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
302       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
303       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
304       PetscInt               j;
305       ddn_local = 0;
306       for (i=0; i<osm->n; i++) {
307 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
308 	ddn_local += dn_local;
309       }
310       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr);
311       ddn_local = 0;
312       for (i=0; i<osm->n; i++) {
313 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
314 	ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr);
315 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr);
316 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr);
317 	ddn_local += dn_local;
318       }
319       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr);
320       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr);
321       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
322       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
323       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
324       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
325       /* Now convert these localized indices into the global indices into the merged output vector. */
326       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr);
327       for(j=0; j < ddn_llocal; ++j) {
328 	ddidx_llocal[j] += gfirst;
329       }
330       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr);
331       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);              CHKERRQ(ierr);
332       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
333     }
334     /* Create the subdomain work vectors. */
335     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
336     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
337     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
338     ierr = VecGetArray(osm->gx, &gxarray);                                     CHKERRQ(ierr);
339     ierr = VecGetArray(osm->gy, &gyarray);                                     CHKERRQ(ierr);
340     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
341       PetscInt dN;
342       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
343       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
344       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
345       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
346     }
347     ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr);
348     ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr);
349     ierr = VecDestroy(x); CHKERRQ(ierr);
350     ierr = VecDestroy(y); CHKERRQ(ierr);
351     /* Create the local solvers */
352     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
353     for (i=0; i<osm->n; i++) { /* KSPs are local */
354       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
355       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
356       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
357       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
358       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
359       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
360       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
361       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
362       osm->ksp[i] = ksp;
363     }
364     scall = MAT_INITIAL_MATRIX;
365 
366   } else {
367     /*
368        Destroy the blocks from the previous iteration
369     */
370     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
371       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
372       scall = MAT_INITIAL_MATRIX;
373     }
374   }
375 
376   /*
377      Extract out the submatrices.
378   */
379   if(size > 1) {
380     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
381   }
382   else {
383     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
384   }
385   if (scall == MAT_INITIAL_MATRIX) {
386     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
387     for (i=0; i<osm->n; i++) {
388       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
389       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
390     }
391   }
392 
393   /* Return control to the user so that the submatrices can be modified (e.g., to apply
394      different boundary conditions for the submatrices than for the global problem) */
395   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
396 
397   /*
398      Loop over submatrices putting them into local ksp
399   */
400   for (i=0; i<osm->n; i++) {
401     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
402     if (!pc->setupcalled) {
403       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
404     }
405   }
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
412 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
413 {
414   PC_GASM         *osm = (PC_GASM*)pc->data;
415   PetscErrorCode ierr;
416   PetscInt       i;
417 
418   PetscFunctionBegin;
419   for (i=0; i<osm->n; i++) {
420     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
421   }
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "PCApply_GASM"
427 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
428 {
429   PC_GASM         *osm = (PC_GASM*)pc->data;
430   PetscErrorCode ierr;
431   PetscInt       i;
432   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
433 
434   PetscFunctionBegin;
435   /*
436      Support for limiting the restriction or interpolation to only local
437      subdomain values (leaving the other values 0).
438   */
439   if (!(osm->type & PC_GASM_RESTRICT)) {
440     forward = SCATTER_FORWARD_LOCAL;
441     /* have to zero the work RHS since scatter may leave some slots empty */
442     ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr);
443   }
444   if (!(osm->type & PC_GASM_INTERPOLATE)) {
445     reverse = SCATTER_REVERSE_LOCAL;
446   }
447 
448   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
449   ierr = VecZeroEntries(y);CHKERRQ(ierr);
450   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
451   /* do the subdomain solves */
452   for (i=0; i<osm->n; ++i) {
453     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
454   }
455   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
456   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "PCApplyTranspose_GASM"
462 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
463 {
464   PC_GASM         *osm = (PC_GASM*)pc->data;
465   PetscErrorCode ierr;
466   PetscInt       i;
467   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
468 
469   PetscFunctionBegin;
470   /*
471      Support for limiting the restriction or interpolation to only local
472      subdomain values (leaving the other values 0).
473 
474      Note: these are reversed from the PCApply_GASM() because we are applying the
475      transpose of the three terms
476   */
477   if (!(osm->type & PC_GASM_INTERPOLATE)) {
478     forward = SCATTER_FORWARD_LOCAL;
479     /* have to zero the work RHS since scatter may leave some slots empty */
480     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
481   }
482   if (!(osm->type & PC_GASM_RESTRICT)) {
483     reverse = SCATTER_REVERSE_LOCAL;
484   }
485 
486   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
487   ierr = VecZeroEntries(y);CHKERRQ(ierr);
488   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
489   /* do the local solves */
490   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
491     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
492   }
493   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "PCDestroy_GASM"
500 static PetscErrorCode PCDestroy_GASM(PC pc)
501 {
502   PC_GASM         *osm = (PC_GASM*)pc->data;
503   PetscErrorCode ierr;
504   PetscInt       i;
505 
506   PetscFunctionBegin;
507   if (osm->ksp) {
508     for (i=0; i<osm->n; i++) {
509       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
510     }
511     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
512   }
513   if (osm->pmat) {
514     if (osm->n > 0) {
515       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
516     }
517   }
518   for (i=0; i<osm->n; i++) {
519     ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
520     ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
521   }
522   ierr = PetscFree(osm->x);CHKERRQ(ierr);
523   ierr = PetscFree(osm->y);CHKERRQ(ierr);
524   ierr = VecDestroy(osm->gx); CHKERRQ(ierr);
525   ierr = VecDestroy(osm->gy); CHKERRQ(ierr);
526 
527   ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr);
528   ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr);
529 
530   if (osm->is) {
531     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
532   }
533 
534   if(osm->gis) {
535     ierr = ISDestroy(osm->gis); CHKERRQ(ierr);
536   }
537   if(osm->gis_local) {
538     ierr = ISDestroy(osm->gis_local); CHKERRQ(ierr);
539   }
540   ierr = PetscFree(pc->data);CHKERRQ(ierr);
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "PCSetFromOptions_GASM"
546 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
547   PC_GASM         *osm = (PC_GASM*)pc->data;
548   PetscErrorCode ierr;
549   PetscInt       blocks,ovl;
550   PetscBool      symset,flg;
551   PCGASMType      gasmtype;
552 
553   PetscFunctionBegin;
554   /* set the type to symmetric if matrix is symmetric */
555   if (!osm->type_set && pc->pmat) {
556     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
557     if (symset && flg) { osm->type = PC_GASM_BASIC; }
558   }
559   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
560     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
561     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
562     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
563     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
564     flg  = PETSC_FALSE;
565     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
566     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
567   ierr = PetscOptionsTail();CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 /*------------------------------------------------------------------------------------*/
572 
573 EXTERN_C_BEGIN
574 #undef __FUNCT__
575 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
576 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
577 {
578   PC_GASM         *osm = (PC_GASM*)pc->data;
579   PetscErrorCode ierr;
580   PetscInt       i;
581 
582   PetscFunctionBegin;
583   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
584   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
585 
586   if (!pc->setupcalled) {
587     osm->n            = n;
588     osm->is           = 0;
589     osm->is_local     = 0;
590     if (is) {
591       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
592     }
593     if (is_local) {
594       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
595     }
596     if (osm->is) {
597       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
598     }
599     if (is) {
600       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
601       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
602       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
603       osm->overlap = -1;
604     }
605     if (is_local) {
606       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
607       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
608     }
609   }
610   PetscFunctionReturn(0);
611 }
612 EXTERN_C_END
613 
614 EXTERN_C_BEGIN
615 #undef __FUNCT__
616 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
617 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
618   PC_GASM         *osm = (PC_GASM*)pc->data;
619   PetscErrorCode ierr;
620   PetscMPIInt    rank,size;
621   PetscInt       n;
622 
623   PetscFunctionBegin;
624   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
625 
626   /*
627      Split the subdomains equally among all processors
628   */
629   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
630   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
631   n = N/size + ((N % size) > rank);
632   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
633   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
634   if (!pc->setupcalled) {
635     if (osm->is) {
636       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
637     }
638     osm->N            = N;
639     osm->n            = n;
640     osm->is           = 0;
641     osm->is_local     = 0;
642   }
643   PetscFunctionReturn(0);
644 }/* PCGASMSetTotalSubdomains_GASM() */
645 EXTERN_C_END
646 
647 EXTERN_C_BEGIN
648 #undef __FUNCT__
649 #define __FUNCT__ "PCGASMSetOverlap_GASM"
650 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
651 {
652   PC_GASM *osm = (PC_GASM*)pc->data;
653 
654   PetscFunctionBegin;
655   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
656   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
657   if (!pc->setupcalled) {
658     osm->overlap = ovl;
659   }
660   PetscFunctionReturn(0);
661 }
662 EXTERN_C_END
663 
664 EXTERN_C_BEGIN
665 #undef __FUNCT__
666 #define __FUNCT__ "PCGASMSetType_GASM"
667 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
668 {
669   PC_GASM *osm = (PC_GASM*)pc->data;
670 
671   PetscFunctionBegin;
672   osm->type     = type;
673   osm->type_set = PETSC_TRUE;
674   PetscFunctionReturn(0);
675 }
676 EXTERN_C_END
677 
678 EXTERN_C_BEGIN
679 #undef __FUNCT__
680 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
681 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
682 {
683   PC_GASM *osm = (PC_GASM*)pc->data;
684 
685   PetscFunctionBegin;
686   osm->sort_indices = doSort;
687   PetscFunctionReturn(0);
688 }
689 EXTERN_C_END
690 
691 EXTERN_C_BEGIN
692 #undef __FUNCT__
693 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
694 /*
695    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
696         In particular, it would upset the global subdomain number calculation.
697 */
698 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
699 {
700   PC_GASM         *osm = (PC_GASM*)pc->data;
701   PetscErrorCode ierr;
702 
703   PetscFunctionBegin;
704   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
705 
706   if (n) {
707     *n = osm->n;
708   }
709   if (first) {
710     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
711     *first -= osm->n;
712   }
713   if (ksp) {
714     /* Assume that local solves are now different; not necessarily
715        true though!  This flag is used only for PCView_GASM() */
716     *ksp                   = osm->ksp;
717     osm->same_local_solves = PETSC_FALSE;
718   }
719   PetscFunctionReturn(0);
720 }/* PCGASMGetSubKSP_GASM() */
721 EXTERN_C_END
722 
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "PCGASMSetLocalSubdomains"
726 /*@C
727     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
728     only) for the additive Schwarz preconditioner.
729 
730     Collective on PC
731 
732     Input Parameters:
733 +   pc - the preconditioner context
734 .   n - the number of subdomains for this processor (default value = 1)
735 .   is - the index set that defines the subdomains for this processor
736          (or PETSC_NULL for PETSc to determine subdomains)
737 -   is_local - the index sets that define the local part of the subdomains for this processor
738          (or PETSC_NULL to use the default of 1 subdomain per process)
739 
740     Notes:
741     The IS numbering is in the parallel, global numbering of the vector.
742 
743     By default the GASM preconditioner uses 1 block per processor.
744 
745     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
746 
747     Level: advanced
748 
749 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
750 
751 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
752           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
753 @*/
754 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
755 {
756   PetscErrorCode ierr;
757 
758   PetscFunctionBegin;
759   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
760   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "PCGASMSetTotalSubdomains"
766 /*@C
767     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
768     additive Schwarz preconditioner.  Either all or no processors in the
769     PC communicator must call this routine, with the same index sets.
770 
771     Collective on PC
772 
773     Input Parameters:
774 +   pc - the preconditioner context
775 .   n - the number of subdomains for all processors
776 .   is - the index sets that define the subdomains for all processor
777          (or PETSC_NULL for PETSc to determine subdomains)
778 -   is_local - the index sets that define the local part of the subdomains for this processor
779          (or PETSC_NULL to use the default of 1 subdomain per process)
780 
781     Options Database Key:
782     To set the total number of subdomain blocks rather than specify the
783     index sets, use the option
784 .    -pc_gasm_blocks <blks> - Sets total blocks
785 
786     Notes:
787     Currently you cannot use this to set the actual subdomains with the argument is.
788 
789     By default the GASM preconditioner uses 1 block per processor.
790 
791     These index sets cannot be destroyed until after completion of the
792     linear solves for which the GASM preconditioner is being used.
793 
794     Use PCGASMSetLocalSubdomains() to set local subdomains.
795 
796     Level: advanced
797 
798 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
799 
800 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
801           PCGASMCreateSubdomains2D()
802 @*/
803 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
804 {
805   PetscErrorCode ierr;
806 
807   PetscFunctionBegin;
808   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
809   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
810   PetscFunctionReturn(0);
811 }
812 
813 #undef __FUNCT__
814 #define __FUNCT__ "PCGASMSetOverlap"
815 /*@
816     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
817     additive Schwarz preconditioner.  Either all or no processors in the
818     PC communicator must call this routine.
819 
820     Logically Collective on PC
821 
822     Input Parameters:
823 +   pc  - the preconditioner context
824 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
825 
826     Options Database Key:
827 .   -pc_gasm_overlap <ovl> - Sets overlap
828 
829     Notes:
830     By default the GASM preconditioner uses 1 block per processor.  To use
831     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
832     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
833 
834     The overlap defaults to 1, so if one desires that no additional
835     overlap be computed beyond what may have been set with a call to
836     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
837     must be set to be 0.  In particular, if one does not explicitly set
838     the subdomains an application code, then all overlap would be computed
839     internally by PETSc, and using an overlap of 0 would result in an GASM
840     variant that is equivalent to the block Jacobi preconditioner.
841 
842     Note that one can define initial index sets with any overlap via
843     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
844     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
845     if desired.
846 
847     Level: intermediate
848 
849 .keywords: PC, GASM, set, overlap
850 
851 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
852           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
853 @*/
854 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
855 {
856   PetscErrorCode ierr;
857 
858   PetscFunctionBegin;
859   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
860   PetscValidLogicalCollectiveInt(pc,ovl,2);
861   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "PCGASMSetType"
867 /*@
868     PCGASMSetType - Sets the type of restriction and interpolation used
869     for local problems in the additive Schwarz method.
870 
871     Logically Collective on PC
872 
873     Input Parameters:
874 +   pc  - the preconditioner context
875 -   type - variant of GASM, one of
876 .vb
877       PC_GASM_BASIC       - full interpolation and restriction
878       PC_GASM_RESTRICT    - full restriction, local processor interpolation
879       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
880       PC_GASM_NONE        - local processor restriction and interpolation
881 .ve
882 
883     Options Database Key:
884 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
885 
886     Level: intermediate
887 
888 .keywords: PC, GASM, set, type
889 
890 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
891           PCGASMCreateSubdomains2D()
892 @*/
893 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
894 {
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
899   PetscValidLogicalCollectiveEnum(pc,type,2);
900   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "PCGASMSetSortIndices"
906 /*@
907     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
908 
909     Logically Collective on PC
910 
911     Input Parameters:
912 +   pc  - the preconditioner context
913 -   doSort - sort the subdomain indices
914 
915     Level: intermediate
916 
917 .keywords: PC, GASM, set, type
918 
919 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
920           PCGASMCreateSubdomains2D()
921 @*/
922 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
923 {
924   PetscErrorCode ierr;
925 
926   PetscFunctionBegin;
927   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
928   PetscValidLogicalCollectiveBool(pc,doSort,2);
929   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "PCGASMGetSubKSP"
935 /*@C
936    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
937    this processor.
938 
939    Collective on PC iff first_local is requested
940 
941    Input Parameter:
942 .  pc - the preconditioner context
943 
944    Output Parameters:
945 +  n_local - the number of blocks on this processor or PETSC_NULL
946 .  first_local - the global number of the first block on this processor or PETSC_NULL,
947                  all processors must request or all must pass PETSC_NULL
948 -  ksp - the array of KSP contexts
949 
950    Note:
951    After PCGASMGetSubKSP() the array of KSPes is not to be freed
952 
953    Currently for some matrix implementations only 1 block per processor
954    is supported.
955 
956    You must call KSPSetUp() before calling PCGASMGetSubKSP().
957 
958    Level: advanced
959 
960 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
961 
962 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
963           PCGASMCreateSubdomains2D(),
964 @*/
965 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
966 {
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
971   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 /* -------------------------------------------------------------------------------------*/
976 /*MC
977    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
978            its own KSP object.
979 
980    Options Database Keys:
981 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
982 .  -pc_gasm_blocks <blks> - Sets total blocks
983 .  -pc_gasm_overlap <ovl> - Sets overlap
984 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
985 
986      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
987       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
988       -pc_gasm_type basic to use the standard GASM.
989 
990    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
991      than one processor. Defaults to one block per processor.
992 
993      To set options on the solvers for each block append -sub_ to all the KSP, and PC
994         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
995 
996      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
997          and set the options directly on the resulting KSP object (you can access its PC
998          with KSPGetPC())
999 
1000 
1001    Level: beginner
1002 
1003    Concepts: additive Schwarz method
1004 
1005     References:
1006     An additive variant of the Schwarz alternating method for the case of many subregions
1007     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1008 
1009     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1010     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1011 
1012 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1013            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1014            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1015 
1016 M*/
1017 
1018 EXTERN_C_BEGIN
1019 #undef __FUNCT__
1020 #define __FUNCT__ "PCCreate_GASM"
1021 PetscErrorCode  PCCreate_GASM(PC pc)
1022 {
1023   PetscErrorCode ierr;
1024   PC_GASM         *osm;
1025 
1026   PetscFunctionBegin;
1027   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1028   osm->N                 = PETSC_DECIDE;
1029   osm->n                 = 0;
1030   osm->nmax              = 0;
1031   osm->overlap           = 1;
1032   osm->ksp               = 0;
1033   osm->grestriction      = 0;
1034   osm->gprolongation     = 0;
1035   osm->gx                = 0;
1036   osm->gy                = 0;
1037   osm->x                 = 0;
1038   osm->y                 = 0;
1039   osm->is                = 0;
1040   osm->is_local          = 0;
1041   osm->pmat              = 0;
1042   osm->type              = PC_GASM_RESTRICT;
1043   osm->same_local_solves = PETSC_TRUE;
1044   osm->sort_indices      = PETSC_TRUE;
1045 
1046   pc->data                   = (void*)osm;
1047   pc->ops->apply             = PCApply_GASM;
1048   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1049   pc->ops->setup             = PCSetUp_GASM;
1050   pc->ops->destroy           = PCDestroy_GASM;
1051   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1052   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1053   pc->ops->view              = PCView_GASM;
1054   pc->ops->applyrichardson   = 0;
1055 
1056   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1057                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1059                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1060   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1061                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1062   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1063                     PCGASMSetType_GASM);CHKERRQ(ierr);
1064   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1065                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1067                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 EXTERN_C_END
1071 
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "PCGASMCreateSubdomains"
1075 /*@C
1076    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1077    preconditioner for a any problem on a general grid.
1078 
1079    Collective
1080 
1081    Input Parameters:
1082 +  A - The global matrix operator
1083 -  n - the number of local blocks
1084 
1085    Output Parameters:
1086 .  outis - the array of index sets defining the subdomains
1087 
1088    Level: advanced
1089 
1090    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1091     from these if you use PCGASMSetLocalSubdomains()
1092 
1093     In the Fortran version you must provide the array outis[] already allocated of length n.
1094 
1095 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1096 
1097 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1098 @*/
1099 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1100 {
1101   MatPartitioning           mpart;
1102   const char                *prefix;
1103   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1104   PetscMPIInt               size;
1105   PetscInt                  i,j,rstart,rend,bs;
1106   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1107   Mat                       Ad = PETSC_NULL, adj;
1108   IS                        ispart,isnumb,*is;
1109   PetscErrorCode            ierr;
1110 
1111   PetscFunctionBegin;
1112   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1113   PetscValidPointer(outis,3);
1114   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1115 
1116   /* Get prefix, row distribution, and block size */
1117   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1118   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1119   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1120   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1121 
1122   /* Get diagonal block from matrix if possible */
1123   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1124   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1125   if (f) {
1126     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1127   } else if (size == 1) {
1128     iscopy = PETSC_FALSE; Ad = A;
1129   } else {
1130     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1131   }
1132   if (Ad) {
1133     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1134     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1135   }
1136   if (Ad && n > 1) {
1137     PetscBool  match,done;
1138     /* Try to setup a good matrix partitioning if available */
1139     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1140     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1141     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1142     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1143     if (!match) {
1144       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1145     }
1146     if (!match) { /* assume a "good" partitioner is available */
1147       PetscInt na,*ia,*ja;
1148       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1149       if (done) {
1150 	/* Build adjacency matrix by hand. Unfortunately a call to
1151 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1152 	   remove the block-aij structure and we cannot expect
1153 	   MatPartitioning to split vertices as we need */
1154 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1155 	nnz = 0;
1156 	for (i=0; i<na; i++) { /* count number of nonzeros */
1157 	  len = ia[i+1] - ia[i];
1158 	  row = ja + ia[i];
1159 	  for (j=0; j<len; j++) {
1160 	    if (row[j] == i) { /* don't count diagonal */
1161 	      len--; break;
1162 	    }
1163 	  }
1164 	  nnz += len;
1165 	}
1166 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1167 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1168 	nnz    = 0;
1169 	iia[0] = 0;
1170 	for (i=0; i<na; i++) { /* fill adjacency */
1171 	  cnt = 0;
1172 	  len = ia[i+1] - ia[i];
1173 	  row = ja + ia[i];
1174 	  for (j=0; j<len; j++) {
1175 	    if (row[j] != i) { /* if not diagonal */
1176 	      jja[nnz+cnt++] = row[j];
1177 	    }
1178 	  }
1179 	  nnz += cnt;
1180 	  iia[i+1] = nnz;
1181 	}
1182 	/* Partitioning of the adjacency matrix */
1183 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1184 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1185 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1186 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1187 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1188 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1189 	foundpart = PETSC_TRUE;
1190       }
1191       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1192     }
1193     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1194   }
1195   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1196 
1197   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1198   *outis = is;
1199 
1200   if (!foundpart) {
1201 
1202     /* Partitioning by contiguous chunks of rows */
1203 
1204     PetscInt mbs   = (rend-rstart)/bs;
1205     PetscInt start = rstart;
1206     for (i=0; i<n; i++) {
1207       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1208       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1209       start += count;
1210     }
1211 
1212   } else {
1213 
1214     /* Partitioning by adjacency of diagonal block  */
1215 
1216     const PetscInt *numbering;
1217     PetscInt       *count,nidx,*indices,*newidx,start=0;
1218     /* Get node count in each partition */
1219     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1220     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1221     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1222       for (i=0; i<n; i++) count[i] *= bs;
1223     }
1224     /* Build indices from node numbering */
1225     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1226     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1227     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1228     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1229     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1230     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1231     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1232       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1233       for (i=0; i<nidx; i++)
1234 	for (j=0; j<bs; j++)
1235 	  newidx[i*bs+j] = indices[i]*bs + j;
1236       ierr = PetscFree(indices);CHKERRQ(ierr);
1237       nidx   *= bs;
1238       indices = newidx;
1239     }
1240     /* Shift to get global indices */
1241     for (i=0; i<nidx; i++) indices[i] += rstart;
1242 
1243     /* Build the index sets for each block */
1244     for (i=0; i<n; i++) {
1245       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1246       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1247       start += count[i];
1248     }
1249 
1250     ierr = PetscFree(count);
1251     ierr = PetscFree(indices);
1252     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1253     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1254   }
1255 
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "PCGASMDestroySubdomains"
1261 /*@C
1262    PCGASMDestroySubdomains - Destroys the index sets created with
1263    PCGASMCreateSubdomains(). Should be called after setting subdomains
1264    with PCGASMSetLocalSubdomains().
1265 
1266    Collective
1267 
1268    Input Parameters:
1269 +  n - the number of index sets
1270 .  is - the array of index sets
1271 -  is_local - the array of local index sets, can be PETSC_NULL
1272 
1273    Level: advanced
1274 
1275 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1276 
1277 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1278 @*/
1279 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1280 {
1281   PetscInt       i;
1282   PetscErrorCode ierr;
1283   PetscFunctionBegin;
1284   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1285   PetscValidPointer(is,2);
1286   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1287   ierr = PetscFree(is);CHKERRQ(ierr);
1288   if (is_local) {
1289     PetscValidPointer(is_local,3);
1290     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1291     ierr = PetscFree(is_local);CHKERRQ(ierr);
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 
1297 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1298 {                                                                                                       \
1299  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1300   /*                                                                                                    \
1301    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1302    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1303    subdomain).                                                                                          \
1304    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1305    of the intersection.                                                                                 \
1306   */                                                                                                    \
1307   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1308   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1309   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1310   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1311   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1312   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1313   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1314   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1315   /* Now compute the size of the local subdomain n. */ \
1316   *n = 0;                                               \
1317   if(*ylow_loc < *yhigh_loc) {                           \
1318     PetscInt width = xright-xleft;                     \
1319     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1320     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1321     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1322   }\
1323 }
1324 
1325 
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1329 /*@
1330    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1331    preconditioner for a two-dimensional problem on a regular grid.
1332 
1333    Collective
1334 
1335    Input Parameters:
1336 +  M, N - the global number of mesh points in the x and y directions
1337 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1338 .  dof - degrees of freedom per node
1339 -  overlap - overlap in mesh lines
1340 
1341    Output Parameters:
1342 +  Nsub - the number of local subdomains created
1343 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1344 -  is_local - array of index sets defining non-overlapping subdomains
1345 
1346    Note:
1347    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1348    preconditioners.  More general related routines are
1349    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1350 
1351    Level: advanced
1352 
1353 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1354 
1355 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1356           PCGASMSetOverlap()
1357 @*/
1358 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1359 {
1360   PetscErrorCode ierr;
1361   PetscMPIInt size, rank;
1362   PetscInt i, j;
1363   PetscInt maxheight, maxwidth;
1364   PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1365   PetscInt ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1366   PetscInt x[2][2], y[2][2], n[2];
1367   PetscInt first, last;
1368   PetscInt nidx, *idx;
1369   PetscInt ii,jj,s,q,d;
1370   PetscInt k,kk;
1371   PetscMPIInt color;
1372   MPI_Comm comm, subcomm;
1373 
1374   IS **iis;
1375   PetscFunctionBegin;
1376   ierr = PetscObjectGetComm((PetscObject)pc, &comm);     CHKERRQ(ierr);
1377   ierr = MPI_Comm_size(comm, &size);                     CHKERRQ(ierr);
1378   ierr = MPI_Comm_rank(comm, &rank);                     CHKERRQ(ierr);
1379   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);  CHKERRQ(ierr);
1380   if(first%dof || last%dof) {
1381     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1382 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1383   }
1384   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1385   s = 0;
1386   ystart = 0;
1387   for (j=0; j<Ndomains; ++j) {
1388     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1389     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1390     /* Vertical domain limits with an overlap. */
1391     ylow = PetscMax(ystart - overlap,0);
1392     yhigh = PetscMin(ystart + maxheight + overlap,N);
1393     xstart = 0;
1394     for (i=0; i<Mdomains; ++i) {
1395       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1396       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1397       /* Horizontal domain limits with an overlap. */
1398       xleft   = PetscMax(xstart - overlap,0);
1399       xright  = PetscMin(xstart + maxwidth + overlap,M);
1400       /*
1401 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1402       */
1403       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1404       if(nidx) {
1405         ++s;
1406       }
1407       xstart += maxwidth;
1408     }/* for(i = 0; i < Mdomains; ++i) */
1409     ystart += maxheight;
1410   }/* for(j = 0; j < Ndomains; ++j) */
1411   /* Now we can allocate the necessary number of ISs. */
1412   *nsub = s;
1413   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);         CHKERRQ(ierr);
1414   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);   CHKERRQ(ierr);
1415   s = 0;
1416   ystart = 0;
1417   for (j=0; j<Ndomains; ++j) {
1418     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1419     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1420     /* Vertical domain limits with an overlap. */
1421     y[0][0] = PetscMax(ystart - overlap,0);
1422     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1423     /* Vertical domain limits without an overlap. */
1424     y[1][0] = ystart;
1425     y[1][1] = PetscMin(ystart + maxheight,N);
1426     xstart = 0;
1427     for (i=0; i<Mdomains; ++i) {
1428       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1429       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1430       /* Horizontal domain limits with an overlap. */
1431       x[0][0]  = PetscMax(xstart - overlap,0);
1432       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1433       /* Horizontal domain limits without an overlap. */
1434       x[1][0] = xstart;
1435       x[1][1] = PetscMin(xstart+maxwidth,M);
1436       /*
1437 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1438 	 Do this twice: first for the domains with overlaps, and once without.
1439 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1440       */
1441       for(q = 0; q < 2; ++q) {
1442 	/*
1443 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1444 	  according to whether the domain with an overlap or without is considered.
1445 	*/
1446 	xleft = x[q][0]; xright = x[q][1];
1447 	ylow  = y[q][0]; yhigh  = y[q][1];
1448         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1449 	nidx *= dof;
1450         n[q] = nidx;
1451         /*
1452          Based on the counted number of indices in the local domain *with an overlap*,
1453          construct a subcommunicator of all the processors supporting this domain.
1454          Observe that a domain with an overlap might have nontrivial local support,
1455          while the domain without an overlap might not.  Hence, the decision to participate
1456          in the subcommunicator must be based on the domain with an overlap.
1457          */
1458 	if(q == 0) {
1459 	  if(nidx) {
1460 	    color = 1;
1461 	  }
1462 	  else {
1463 	    color = MPI_UNDEFINED;
1464 	  }
1465 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr);
1466 	}
1467         /*
1468          Proceed only if the number of local indices *with an overlap* is nonzero.
1469          */
1470         if(n[0]) {
1471           if(q == 0) {
1472             iis = is;
1473           }
1474           if(q == 1) {
1475             /*
1476              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1477              Moreover, if the overlap is zero, the two ISs are identical.
1478              */
1479             if(overlap == 0) {
1480               (*is_local)[s] = (*is)[s];
1481               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1482               continue;
1483             }
1484             else {
1485               iis = is_local;
1486               subcomm = ((PetscObject)(*is)[s])->comm;
1487             }
1488           }/* if(q == 1) */
1489           idx = PETSC_NULL;
1490 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1491           if(nidx) {
1492             k    = 0;
1493             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1494               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1495               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1496               kk = dof*(M*jj + x0);
1497               for (ii=x0; ii<x1; ++ii) {
1498                 for(d = 0; d < dof; ++d) {
1499                   idx[k++] = kk++;
1500                 }
1501               }
1502             }
1503           }
1504 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1505 	}/* if(n[0]) */
1506       }/* for(q = 0; q < 2; ++q) */
1507       if(n[0]) {
1508         ++s;
1509       }
1510       xstart += maxwidth;
1511     }/* for(i = 0; i < Mdomains; ++i) */
1512     ystart += maxheight;
1513   }/* for(j = 0; j < Ndomains; ++j) */
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1519 /*@C
1520     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1521     only) for the additive Schwarz preconditioner.
1522 
1523     Not Collective
1524 
1525     Input Parameter:
1526 .   pc - the preconditioner context
1527 
1528     Output Parameters:
1529 +   n - the number of subdomains for this processor (default value = 1)
1530 .   is - the index sets that define the subdomains for this processor
1531 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1532 
1533 
1534     Notes:
1535     The IS numbering is in the parallel, global numbering of the vector.
1536 
1537     Level: advanced
1538 
1539 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1540 
1541 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1542           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1543 @*/
1544 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1545 {
1546   PC_GASM         *osm;
1547   PetscErrorCode ierr;
1548   PetscBool      match;
1549 
1550   PetscFunctionBegin;
1551   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1552   PetscValidIntPointer(n,2);
1553   if (is) PetscValidPointer(is,3);
1554   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1555   if (!match) {
1556     if (n)  *n  = 0;
1557     if (is) *is = PETSC_NULL;
1558   } else {
1559     osm = (PC_GASM*)pc->data;
1560     if (n)  *n  = osm->n;
1561     if (is) *is = osm->is;
1562     if (is_local) *is_local = osm->is_local;
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1569 /*@C
1570     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1571     only) for the additive Schwarz preconditioner.
1572 
1573     Not Collective
1574 
1575     Input Parameter:
1576 .   pc - the preconditioner context
1577 
1578     Output Parameters:
1579 +   n - the number of matrices for this processor (default value = 1)
1580 -   mat - the matrices
1581 
1582 
1583     Level: advanced
1584 
1585 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1586 
1587 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1588           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1589 @*/
1590 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1591 {
1592   PC_GASM         *osm;
1593   PetscErrorCode ierr;
1594   PetscBool      match;
1595 
1596   PetscFunctionBegin;
1597   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1598   PetscValidIntPointer(n,2);
1599   if (mat) PetscValidPointer(mat,3);
1600   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1601   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1602   if (!match) {
1603     if (n)   *n   = 0;
1604     if (mat) *mat = PETSC_NULL;
1605   } else {
1606     osm = (PC_GASM*)pc->data;
1607     if (n)   *n   = osm->n;
1608     if (mat) *mat = osm->pmat;
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612