xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 7d8118323a4b084c8e4ece7cc84f635667bda1e6)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
74     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
75     ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
76     ierr = PetscFree(cidx);CHKERRQ(ierr);
77     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
78     if (osm->is_local) {
79       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
80       ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
81       /*
82        No more than 15 characters per index plus a space.
83        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
84        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
85        For nidx == 0, the whole string 16 '\0'.
86        */
87       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
88       ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
89       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
90       for(j = 0; j < nidx; ++j) {
91         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
92       }
93       ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
94       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
95 
96       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr);
98       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
99       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
100       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
101       ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
102       ierr = PetscFree(cidx);CHKERRQ(ierr);
103       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "PCView_GASM"
112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
113 {
114   PC_GASM         *osm = (PC_GASM*)pc->data;
115   const char     *prefix;
116   PetscErrorCode ierr;
117   PetscMPIInt    rank, size;
118   PetscInt       i,bsz;
119   PetscBool      iascii,isstring, print_subdomains;
120   PetscViewer    sviewer;
121 
122 
123   PetscFunctionBegin;
124   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
125   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
126   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
127   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
128   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
129   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
130   if (iascii) {
131     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
132     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
133     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
134     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
135     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
136     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
137     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
138     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
139     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
140     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
141     if (osm->same_local_solves) {
142       if (osm->ksp) {
143         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
144         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
145         if (!rank) {
146           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
147           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
148           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
149         }
150         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
151       }
152     } else {
153       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
154       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
155       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
156       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
157       for (i=0; i<osm->nmax; i++) {
158         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
159         if (i < osm->n) {
160           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
161           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr);
162           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
163           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
164           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
165           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
166           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr);
167         }
168         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169       }
170       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
171       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172     }
173   } else if (isstring) {
174     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
175     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
176     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
178   } else {
179     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
180   }
181   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
182   if(print_subdomains) {
183     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 
189 
190 
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "PCSetUp_GASM"
194 static PetscErrorCode PCSetUp_GASM(PC pc)
195 {
196   PC_GASM         *osm  = (PC_GASM*)pc->data;
197   PetscErrorCode ierr;
198   PetscBool      symset,flg;
199   PetscInt       i;
200   PetscMPIInt    rank, size;
201   MatReuse       scall = MAT_REUSE_MATRIX;
202   KSP            ksp;
203   PC             subpc;
204   const char     *prefix,*pprefix;
205   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
206   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
207   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
208   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
209   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
210   Vec            x,y;
211   PetscScalar    *gxarray, *gyarray;
212   PetscInt       gfirst, glast;
213 
214   PetscFunctionBegin;
215   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
216   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
217   if (!pc->setupcalled) {
218 
219     if (!osm->type_set) {
220       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
221       if (symset && flg) { osm->type = PC_GASM_BASIC; }
222     }
223 
224     if (osm->N == PETSC_DECIDE && osm->n < 1) {
225       /* no subdomains given, use one per processor */
226       osm->nmax = osm->n = 1;
227       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
228       osm->N = size;
229     } else if (osm->N == PETSC_DECIDE) {
230       PetscInt inwork[2], outwork[2];
231       /* determine global number of subdomains and the max number of local subdomains */
232       inwork[0] = inwork[1] = osm->n;
233       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
234       osm->nmax = outwork[0];
235       osm->N    = outwork[1];
236     }
237     if (!osm->is){ /* create the index sets */
238       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
239     }
240     if (!osm->is_local) {
241       /*
242 	 This indicates that osm->is should define a nonoverlapping decomposition
243 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
244 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
245 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
246 	 Therefore, osm->is will be used to define osm->is_local.
247 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
248 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
249 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
250       */
251       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
252       for (i=0; i<osm->n; i++) {
253         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
254           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
255           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
256         } else {
257           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
258           osm->is_local[i] = osm->is[i];
259         }
260       }
261     }
262     /* Beyond this point osm->is_local is not null. */
263     if (osm->overlap > 0) {
264       /* Extend the "overlapping" regions by a number of steps */
265       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
266     }
267     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
268     flg  = PETSC_FALSE;
269     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
270     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
271 
272     if (osm->sort_indices) {
273       for (i=0; i<osm->n; i++) {
274         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
275 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
276       }
277     }
278     /* Merge the ISs, create merged vectors and scatter contexts. */
279     /* Restriction ISs. */
280     ddn = 0;
281     for (i=0; i<osm->n; i++) {
282       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
283       ddn += dn;
284     }
285     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
286     ddn = 0;
287     for (i=0; i<osm->n; i++) {
288       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
289       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
290       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
291       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
292       ddn += dn;
293     }
294     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
295     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
296     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
297     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
298     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
299     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
300     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
301     ierr = ISDestroy(&gid);CHKERRQ(ierr);
302     /* Prolongation ISs */
303     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
304       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
305       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
306       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
307       /**/
308       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
309       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
310       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
311       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
312       PetscInt               j;
313       ddn_local = 0;
314       for (i=0; i<osm->n; i++) {
315 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
316 	ddn_local += dn_local;
317       }
318       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
319       ddn_local = 0;
320       for (i=0; i<osm->n; i++) {
321 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
322 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
323 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
324 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
325 	ddn_local += dn_local;
326       }
327       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
328       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
329       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
330       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
331       ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
332       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
333       /* Now convert these localized indices into the global indices into the merged output vector. */
334       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
335       for(j=0; j < ddn_llocal; ++j) {
336 	ddidx_llocal[j] += gfirst;
337       }
338       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
339       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
340       ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr);
341     }
342     /* Create the subdomain work vectors. */
343     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
344     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
345     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
346     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
347     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
348     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
349       PetscInt dN;
350       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
351       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
352       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
353       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
354     }
355     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
356     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
357     ierr = VecDestroy(&x);CHKERRQ(ierr);
358     ierr = VecDestroy(&y);CHKERRQ(ierr);
359     /* Create the local solvers */
360     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
361     for (i=0; i<osm->n; i++) { /* KSPs are local */
362       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
363       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
364       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
365       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
366       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
367       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
368       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
369       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
370       osm->ksp[i] = ksp;
371     }
372     scall = MAT_INITIAL_MATRIX;
373 
374   } else {
375     /*
376        Destroy the blocks from the previous iteration
377     */
378     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
379       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
380       scall = MAT_INITIAL_MATRIX;
381     }
382   }
383 
384   /*
385      Extract out the submatrices.
386   */
387   if(size > 1) {
388     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
389   }
390   else {
391     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
392   }
393   if (scall == MAT_INITIAL_MATRIX) {
394     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
395     for (i=0; i<osm->n; i++) {
396       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
397       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
398     }
399   }
400 
401   /* Return control to the user so that the submatrices can be modified (e.g., to apply
402      different boundary conditions for the submatrices than for the global problem) */
403   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
404 
405   /*
406      Loop over submatrices putting them into local ksp
407   */
408   for (i=0; i<osm->n; i++) {
409     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
410     if (!pc->setupcalled) {
411       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
412     }
413   }
414 
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
420 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
421 {
422   PC_GASM         *osm = (PC_GASM*)pc->data;
423   PetscErrorCode ierr;
424   PetscInt       i;
425 
426   PetscFunctionBegin;
427   for (i=0; i<osm->n; i++) {
428     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
429   }
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "PCApply_GASM"
435 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
436 {
437   PC_GASM         *osm = (PC_GASM*)pc->data;
438   PetscErrorCode ierr;
439   PetscInt       i;
440   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
441 
442   PetscFunctionBegin;
443   /*
444      Support for limiting the restriction or interpolation to only local
445      subdomain values (leaving the other values 0).
446   */
447   if (!(osm->type & PC_GASM_RESTRICT)) {
448     forward = SCATTER_FORWARD_LOCAL;
449     /* have to zero the work RHS since scatter may leave some slots empty */
450     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
451   }
452   if (!(osm->type & PC_GASM_INTERPOLATE)) {
453     reverse = SCATTER_REVERSE_LOCAL;
454   }
455 
456   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
457   ierr = VecZeroEntries(y);CHKERRQ(ierr);
458   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
459   /* do the subdomain solves */
460   for (i=0; i<osm->n; ++i) {
461     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
462   }
463   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
464   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "PCApplyTranspose_GASM"
470 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
471 {
472   PC_GASM         *osm = (PC_GASM*)pc->data;
473   PetscErrorCode ierr;
474   PetscInt       i;
475   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
476 
477   PetscFunctionBegin;
478   /*
479      Support for limiting the restriction or interpolation to only local
480      subdomain values (leaving the other values 0).
481 
482      Note: these are reversed from the PCApply_GASM() because we are applying the
483      transpose of the three terms
484   */
485   if (!(osm->type & PC_GASM_INTERPOLATE)) {
486     forward = SCATTER_FORWARD_LOCAL;
487     /* have to zero the work RHS since scatter may leave some slots empty */
488     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
489   }
490   if (!(osm->type & PC_GASM_RESTRICT)) {
491     reverse = SCATTER_REVERSE_LOCAL;
492   }
493 
494   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
495   ierr = VecZeroEntries(y);CHKERRQ(ierr);
496   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
497   /* do the local solves */
498   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
499     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
500   }
501   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
502   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "PCReset_GASM"
508 static PetscErrorCode PCReset_GASM(PC pc)
509 {
510   PC_GASM        *osm = (PC_GASM*)pc->data;
511   PetscErrorCode ierr;
512   PetscInt       i;
513 
514   PetscFunctionBegin;
515   if (osm->ksp) {
516     for (i=0; i<osm->n; i++) {
517       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
518     }
519   }
520   if (osm->pmat) {
521     if (osm->n > 0) {
522       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
523     }
524   }
525   if (osm->x) {
526     for (i=0; i<osm->n; i++) {
527       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
528       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
529     }
530   }
531   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
532   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
533 
534   ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr);
535   ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr);
536   if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;}
537   ierr = ISDestroy(&osm->gis);CHKERRQ(ierr);
538   ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "PCDestroy_GASM"
544 static PetscErrorCode PCDestroy_GASM(PC pc)
545 {
546   PC_GASM         *osm = (PC_GASM*)pc->data;
547   PetscErrorCode ierr;
548   PetscInt       i;
549 
550   PetscFunctionBegin;
551   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
552   if (osm->ksp) {
553     for (i=0; i<osm->n; i++) {
554       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
555     }
556     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
557   }
558   ierr = PetscFree(osm->x);CHKERRQ(ierr);
559   ierr = PetscFree(osm->y);CHKERRQ(ierr);
560   ierr = PetscFree(pc->data);CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "PCSetFromOptions_GASM"
566 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
567   PC_GASM         *osm = (PC_GASM*)pc->data;
568   PetscErrorCode ierr;
569   PetscInt       blocks,ovl;
570   PetscBool      symset,flg;
571   PCGASMType      gasmtype;
572 
573   PetscFunctionBegin;
574   /* set the type to symmetric if matrix is symmetric */
575   if (!osm->type_set && pc->pmat) {
576     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
577     if (symset && flg) { osm->type = PC_GASM_BASIC; }
578   }
579   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
580     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
581     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
582     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
583     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
584     flg  = PETSC_FALSE;
585     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
586     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
587   ierr = PetscOptionsTail();CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 /*------------------------------------------------------------------------------------*/
592 
593 EXTERN_C_BEGIN
594 #undef __FUNCT__
595 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
596 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
597 {
598   PC_GASM         *osm = (PC_GASM*)pc->data;
599   PetscErrorCode ierr;
600   PetscInt       i;
601 
602   PetscFunctionBegin;
603   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
604   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
605 
606   if (!pc->setupcalled) {
607     osm->n            = n;
608     osm->is           = 0;
609     osm->is_local     = 0;
610     if (is) {
611       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
612     }
613     if (is_local) {
614       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
615     }
616     if (osm->is) {
617       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
618     }
619     if (is) {
620       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
621       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
622       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
623       osm->overlap = -1;
624     }
625     if (is_local) {
626       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
627       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 EXTERN_C_END
633 
634 EXTERN_C_BEGIN
635 #undef __FUNCT__
636 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
637 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
638   PC_GASM         *osm = (PC_GASM*)pc->data;
639   PetscErrorCode ierr;
640   PetscMPIInt    rank,size;
641   PetscInt       n;
642 
643   PetscFunctionBegin;
644   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
645 
646   /*
647      Split the subdomains equally among all processors
648   */
649   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
650   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
651   n = N/size + ((N % size) > rank);
652   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
653   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
654   if (!pc->setupcalled) {
655     if (osm->is) {
656       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
657     }
658     osm->N            = N;
659     osm->n            = n;
660     osm->is           = 0;
661     osm->is_local     = 0;
662   }
663   PetscFunctionReturn(0);
664 }/* PCGASMSetTotalSubdomains_GASM() */
665 EXTERN_C_END
666 
667 EXTERN_C_BEGIN
668 #undef __FUNCT__
669 #define __FUNCT__ "PCGASMSetOverlap_GASM"
670 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
671 {
672   PC_GASM *osm = (PC_GASM*)pc->data;
673 
674   PetscFunctionBegin;
675   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
676   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
677   if (!pc->setupcalled) {
678     osm->overlap = ovl;
679   }
680   PetscFunctionReturn(0);
681 }
682 EXTERN_C_END
683 
684 EXTERN_C_BEGIN
685 #undef __FUNCT__
686 #define __FUNCT__ "PCGASMSetType_GASM"
687 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
688 {
689   PC_GASM *osm = (PC_GASM*)pc->data;
690 
691   PetscFunctionBegin;
692   osm->type     = type;
693   osm->type_set = PETSC_TRUE;
694   PetscFunctionReturn(0);
695 }
696 EXTERN_C_END
697 
698 EXTERN_C_BEGIN
699 #undef __FUNCT__
700 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
701 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
702 {
703   PC_GASM *osm = (PC_GASM*)pc->data;
704 
705   PetscFunctionBegin;
706   osm->sort_indices = doSort;
707   PetscFunctionReturn(0);
708 }
709 EXTERN_C_END
710 
711 EXTERN_C_BEGIN
712 #undef __FUNCT__
713 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
714 /*
715    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
716         In particular, it would upset the global subdomain number calculation.
717 */
718 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
719 {
720   PC_GASM         *osm = (PC_GASM*)pc->data;
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
725 
726   if (n) {
727     *n = osm->n;
728   }
729   if (first) {
730     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
731     *first -= osm->n;
732   }
733   if (ksp) {
734     /* Assume that local solves are now different; not necessarily
735        true though!  This flag is used only for PCView_GASM() */
736     *ksp                   = osm->ksp;
737     osm->same_local_solves = PETSC_FALSE;
738   }
739   PetscFunctionReturn(0);
740 }/* PCGASMGetSubKSP_GASM() */
741 EXTERN_C_END
742 
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "PCGASMSetLocalSubdomains"
746 /*@C
747     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
748     only) for the additive Schwarz preconditioner.
749 
750     Collective on PC
751 
752     Input Parameters:
753 +   pc - the preconditioner context
754 .   n - the number of subdomains for this processor (default value = 1)
755 .   is - the index set that defines the subdomains for this processor
756          (or PETSC_NULL for PETSc to determine subdomains)
757 -   is_local - the index sets that define the local part of the subdomains for this processor
758          (or PETSC_NULL to use the default of 1 subdomain per process)
759 
760     Notes:
761     The IS numbering is in the parallel, global numbering of the vector.
762 
763     By default the GASM preconditioner uses 1 block per processor.
764 
765     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
766 
767     Level: advanced
768 
769 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
770 
771 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
772           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
773 @*/
774 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
775 {
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
780   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "PCGASMSetTotalSubdomains"
786 /*@C
787     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
788     additive Schwarz preconditioner.  Either all or no processors in the
789     PC communicator must call this routine, with the same index sets.
790 
791     Collective on PC
792 
793     Input Parameters:
794 +   pc - the preconditioner context
795 .   n - the number of subdomains for all processors
796 .   is - the index sets that define the subdomains for all processor
797          (or PETSC_NULL for PETSc to determine subdomains)
798 -   is_local - the index sets that define the local part of the subdomains for this processor
799          (or PETSC_NULL to use the default of 1 subdomain per process)
800 
801     Options Database Key:
802     To set the total number of subdomain blocks rather than specify the
803     index sets, use the option
804 .    -pc_gasm_blocks <blks> - Sets total blocks
805 
806     Notes:
807     Currently you cannot use this to set the actual subdomains with the argument is.
808 
809     By default the GASM preconditioner uses 1 block per processor.
810 
811     These index sets cannot be destroyed until after completion of the
812     linear solves for which the GASM preconditioner is being used.
813 
814     Use PCGASMSetLocalSubdomains() to set local subdomains.
815 
816     Level: advanced
817 
818 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
819 
820 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
821           PCGASMCreateSubdomains2D()
822 @*/
823 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
824 {
825   PetscErrorCode ierr;
826 
827   PetscFunctionBegin;
828   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
829   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "PCGASMSetOverlap"
835 /*@
836     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
837     additive Schwarz preconditioner.  Either all or no processors in the
838     PC communicator must call this routine.
839 
840     Logically Collective on PC
841 
842     Input Parameters:
843 +   pc  - the preconditioner context
844 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
845 
846     Options Database Key:
847 .   -pc_gasm_overlap <ovl> - Sets overlap
848 
849     Notes:
850     By default the GASM preconditioner uses 1 block per processor.  To use
851     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
852     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
853 
854     The overlap defaults to 1, so if one desires that no additional
855     overlap be computed beyond what may have been set with a call to
856     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
857     must be set to be 0.  In particular, if one does not explicitly set
858     the subdomains an application code, then all overlap would be computed
859     internally by PETSc, and using an overlap of 0 would result in an GASM
860     variant that is equivalent to the block Jacobi preconditioner.
861 
862     Note that one can define initial index sets with any overlap via
863     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
864     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
865     if desired.
866 
867     Level: intermediate
868 
869 .keywords: PC, GASM, set, overlap
870 
871 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
872           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
873 @*/
874 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
875 {
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
880   PetscValidLogicalCollectiveInt(pc,ovl,2);
881   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
882   PetscFunctionReturn(0);
883 }
884 
885 #undef __FUNCT__
886 #define __FUNCT__ "PCGASMSetType"
887 /*@
888     PCGASMSetType - Sets the type of restriction and interpolation used
889     for local problems in the additive Schwarz method.
890 
891     Logically Collective on PC
892 
893     Input Parameters:
894 +   pc  - the preconditioner context
895 -   type - variant of GASM, one of
896 .vb
897       PC_GASM_BASIC       - full interpolation and restriction
898       PC_GASM_RESTRICT    - full restriction, local processor interpolation
899       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
900       PC_GASM_NONE        - local processor restriction and interpolation
901 .ve
902 
903     Options Database Key:
904 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
905 
906     Level: intermediate
907 
908 .keywords: PC, GASM, set, type
909 
910 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
911           PCGASMCreateSubdomains2D()
912 @*/
913 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
914 {
915   PetscErrorCode ierr;
916 
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
919   PetscValidLogicalCollectiveEnum(pc,type,2);
920   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "PCGASMSetSortIndices"
926 /*@
927     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
928 
929     Logically Collective on PC
930 
931     Input Parameters:
932 +   pc  - the preconditioner context
933 -   doSort - sort the subdomain indices
934 
935     Level: intermediate
936 
937 .keywords: PC, GASM, set, type
938 
939 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
940           PCGASMCreateSubdomains2D()
941 @*/
942 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
943 {
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
948   PetscValidLogicalCollectiveBool(pc,doSort,2);
949   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "PCGASMGetSubKSP"
955 /*@C
956    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
957    this processor.
958 
959    Collective on PC iff first_local is requested
960 
961    Input Parameter:
962 .  pc - the preconditioner context
963 
964    Output Parameters:
965 +  n_local - the number of blocks on this processor or PETSC_NULL
966 .  first_local - the global number of the first block on this processor or PETSC_NULL,
967                  all processors must request or all must pass PETSC_NULL
968 -  ksp - the array of KSP contexts
969 
970    Note:
971    After PCGASMGetSubKSP() the array of KSPes is not to be freed
972 
973    Currently for some matrix implementations only 1 block per processor
974    is supported.
975 
976    You must call KSPSetUp() before calling PCGASMGetSubKSP().
977 
978    Level: advanced
979 
980 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
981 
982 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
983           PCGASMCreateSubdomains2D(),
984 @*/
985 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
991   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 /* -------------------------------------------------------------------------------------*/
996 /*MC
997    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
998            its own KSP object.
999 
1000    Options Database Keys:
1001 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
1002 .  -pc_gasm_blocks <blks> - Sets total blocks
1003 .  -pc_gasm_overlap <ovl> - Sets overlap
1004 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1005 
1006      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1007       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1008       -pc_gasm_type basic to use the standard GASM.
1009 
1010    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1011      than one processor. Defaults to one block per processor.
1012 
1013      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1014         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1015 
1016      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1017          and set the options directly on the resulting KSP object (you can access its PC
1018          with KSPGetPC())
1019 
1020 
1021    Level: beginner
1022 
1023    Concepts: additive Schwarz method
1024 
1025     References:
1026     An additive variant of the Schwarz alternating method for the case of many subregions
1027     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1028 
1029     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1030     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1031 
1032 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1033            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1034            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1035 
1036 M*/
1037 
1038 EXTERN_C_BEGIN
1039 #undef __FUNCT__
1040 #define __FUNCT__ "PCCreate_GASM"
1041 PetscErrorCode  PCCreate_GASM(PC pc)
1042 {
1043   PetscErrorCode ierr;
1044   PC_GASM         *osm;
1045 
1046   PetscFunctionBegin;
1047   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1048   osm->N                 = PETSC_DECIDE;
1049   osm->n                 = 0;
1050   osm->nmax              = 0;
1051   osm->overlap           = 1;
1052   osm->ksp               = 0;
1053   osm->grestriction      = 0;
1054   osm->gprolongation     = 0;
1055   osm->gx                = 0;
1056   osm->gy                = 0;
1057   osm->x                 = 0;
1058   osm->y                 = 0;
1059   osm->is                = 0;
1060   osm->is_local          = 0;
1061   osm->pmat              = 0;
1062   osm->type              = PC_GASM_RESTRICT;
1063   osm->same_local_solves = PETSC_TRUE;
1064   osm->sort_indices      = PETSC_TRUE;
1065 
1066   pc->data                   = (void*)osm;
1067   pc->ops->apply             = PCApply_GASM;
1068   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1069   pc->ops->setup             = PCSetUp_GASM;
1070   pc->ops->reset             = PCReset_GASM;
1071   pc->ops->destroy           = PCDestroy_GASM;
1072   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1073   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1074   pc->ops->view              = PCView_GASM;
1075   pc->ops->applyrichardson   = 0;
1076 
1077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1078                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1079   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1080                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1082                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1084                     PCGASMSetType_GASM);CHKERRQ(ierr);
1085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1086                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1088                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 EXTERN_C_END
1092 
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "PCGASMCreateSubdomains"
1096 /*@C
1097    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1098    preconditioner for a any problem on a general grid.
1099 
1100    Collective
1101 
1102    Input Parameters:
1103 +  A - The global matrix operator
1104 -  n - the number of local blocks
1105 
1106    Output Parameters:
1107 .  outis - the array of index sets defining the subdomains
1108 
1109    Level: advanced
1110 
1111    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1112     from these if you use PCGASMSetLocalSubdomains()
1113 
1114     In the Fortran version you must provide the array outis[] already allocated of length n.
1115 
1116 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1117 
1118 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1119 @*/
1120 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1121 {
1122   MatPartitioning           mpart;
1123   const char                *prefix;
1124   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1125   PetscMPIInt               size;
1126   PetscInt                  i,j,rstart,rend,bs;
1127   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1128   Mat                       Ad = PETSC_NULL, adj;
1129   IS                        ispart,isnumb,*is;
1130   PetscErrorCode            ierr;
1131 
1132   PetscFunctionBegin;
1133   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1134   PetscValidPointer(outis,3);
1135   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1136 
1137   /* Get prefix, row distribution, and block size */
1138   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1139   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1140   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1141   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1142 
1143   /* Get diagonal block from matrix if possible */
1144   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1145   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1146   if (f) {
1147     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1148   } else if (size == 1) {
1149     iscopy = PETSC_FALSE; Ad = A;
1150   } else {
1151     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1152   }
1153   if (Ad) {
1154     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1155     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1156   }
1157   if (Ad && n > 1) {
1158     PetscBool  match,done;
1159     /* Try to setup a good matrix partitioning if available */
1160     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1161     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1162     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1163     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1164     if (!match) {
1165       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1166     }
1167     if (!match) { /* assume a "good" partitioner is available */
1168       PetscInt na,*ia,*ja;
1169       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1170       if (done) {
1171 	/* Build adjacency matrix by hand. Unfortunately a call to
1172 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1173 	   remove the block-aij structure and we cannot expect
1174 	   MatPartitioning to split vertices as we need */
1175 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1176 	nnz = 0;
1177 	for (i=0; i<na; i++) { /* count number of nonzeros */
1178 	  len = ia[i+1] - ia[i];
1179 	  row = ja + ia[i];
1180 	  for (j=0; j<len; j++) {
1181 	    if (row[j] == i) { /* don't count diagonal */
1182 	      len--; break;
1183 	    }
1184 	  }
1185 	  nnz += len;
1186 	}
1187 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1188 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1189 	nnz    = 0;
1190 	iia[0] = 0;
1191 	for (i=0; i<na; i++) { /* fill adjacency */
1192 	  cnt = 0;
1193 	  len = ia[i+1] - ia[i];
1194 	  row = ja + ia[i];
1195 	  for (j=0; j<len; j++) {
1196 	    if (row[j] != i) { /* if not diagonal */
1197 	      jja[nnz+cnt++] = row[j];
1198 	    }
1199 	  }
1200 	  nnz += cnt;
1201 	  iia[i+1] = nnz;
1202 	}
1203 	/* Partitioning of the adjacency matrix */
1204 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1205 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1206 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1207 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1208 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1209 	ierr = MatDestroy(&adj);CHKERRQ(ierr);
1210 	foundpart = PETSC_TRUE;
1211       }
1212       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1213     }
1214     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1215   }
1216   if (iscopy) {ierr = MatDestroy(&Ad);CHKERRQ(ierr);}
1217 
1218   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1219   *outis = is;
1220 
1221   if (!foundpart) {
1222 
1223     /* Partitioning by contiguous chunks of rows */
1224 
1225     PetscInt mbs   = (rend-rstart)/bs;
1226     PetscInt start = rstart;
1227     for (i=0; i<n; i++) {
1228       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1229       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1230       start += count;
1231     }
1232 
1233   } else {
1234 
1235     /* Partitioning by adjacency of diagonal block  */
1236 
1237     const PetscInt *numbering;
1238     PetscInt       *count,nidx,*indices,*newidx,start=0;
1239     /* Get node count in each partition */
1240     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1241     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1242     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1243       for (i=0; i<n; i++) count[i] *= bs;
1244     }
1245     /* Build indices from node numbering */
1246     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1247     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1248     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1249     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1250     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1251     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1252     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1253       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1254       for (i=0; i<nidx; i++)
1255 	for (j=0; j<bs; j++)
1256 	  newidx[i*bs+j] = indices[i]*bs + j;
1257       ierr = PetscFree(indices);CHKERRQ(ierr);
1258       nidx   *= bs;
1259       indices = newidx;
1260     }
1261     /* Shift to get global indices */
1262     for (i=0; i<nidx; i++) indices[i] += rstart;
1263 
1264     /* Build the index sets for each block */
1265     for (i=0; i<n; i++) {
1266       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1267       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1268       start += count[i];
1269     }
1270 
1271     ierr = PetscFree(count);
1272     ierr = PetscFree(indices);
1273     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1274     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1275   }
1276 
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "PCGASMDestroySubdomains"
1282 /*@C
1283    PCGASMDestroySubdomains - Destroys the index sets created with
1284    PCGASMCreateSubdomains(). Should be called after setting subdomains
1285    with PCGASMSetLocalSubdomains().
1286 
1287    Collective
1288 
1289    Input Parameters:
1290 +  n - the number of index sets
1291 .  is - the array of index sets
1292 -  is_local - the array of local index sets, can be PETSC_NULL
1293 
1294    Level: advanced
1295 
1296 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1297 
1298 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1299 @*/
1300 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1301 {
1302   PetscInt       i;
1303   PetscErrorCode ierr;
1304   PetscFunctionBegin;
1305   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1306   PetscValidPointer(is,2);
1307   for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); }
1308   ierr = PetscFree(is);CHKERRQ(ierr);
1309   if (is_local) {
1310     PetscValidPointer(is_local,3);
1311     for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); }
1312     ierr = PetscFree(is_local);CHKERRQ(ierr);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 
1318 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1319 {                                                                                                       \
1320  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1321   /*                                                                                                    \
1322    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1323    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1324    subdomain).                                                                                          \
1325    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1326    of the intersection.                                                                                 \
1327   */                                                                                                    \
1328   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1329   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1330   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1331   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1332   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1333   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1334   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1335   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1336   /* Now compute the size of the local subdomain n. */ \
1337   *n = 0;                                               \
1338   if(*ylow_loc < *yhigh_loc) {                           \
1339     PetscInt width = xright-xleft;                     \
1340     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1341     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1342     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1343   }\
1344 }
1345 
1346 
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1350 /*@
1351    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1352    preconditioner for a two-dimensional problem on a regular grid.
1353 
1354    Collective
1355 
1356    Input Parameters:
1357 +  M, N - the global number of mesh points in the x and y directions
1358 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1359 .  dof - degrees of freedom per node
1360 -  overlap - overlap in mesh lines
1361 
1362    Output Parameters:
1363 +  Nsub - the number of local subdomains created
1364 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1365 -  is_local - array of index sets defining non-overlapping subdomains
1366 
1367    Note:
1368    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1369    preconditioners.  More general related routines are
1370    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1371 
1372    Level: advanced
1373 
1374 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1375 
1376 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1377           PCGASMSetOverlap()
1378 @*/
1379 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1380 {
1381   PetscErrorCode ierr;
1382   PetscMPIInt    size, rank;
1383   PetscInt       i, j;
1384   PetscInt       maxheight, maxwidth;
1385   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1386   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1387   PetscInt       x[2][2], y[2][2], n[2];
1388   PetscInt       first, last;
1389   PetscInt       nidx, *idx;
1390   PetscInt       ii,jj,s,q,d;
1391   PetscInt       k,kk;
1392   PetscMPIInt    color;
1393   MPI_Comm       comm, subcomm;
1394   IS             **iis = 0;
1395 
1396   PetscFunctionBegin;
1397   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1398   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1399   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1400   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1401   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1402 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1403 
1404   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1405   s = 0;
1406   ystart = 0;
1407   for (j=0; j<Ndomains; ++j) {
1408     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1409     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1410     /* Vertical domain limits with an overlap. */
1411     ylow = PetscMax(ystart - overlap,0);
1412     yhigh = PetscMin(ystart + maxheight + overlap,N);
1413     xstart = 0;
1414     for (i=0; i<Mdomains; ++i) {
1415       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1416       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1417       /* Horizontal domain limits with an overlap. */
1418       xleft   = PetscMax(xstart - overlap,0);
1419       xright  = PetscMin(xstart + maxwidth + overlap,M);
1420       /*
1421 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1422       */
1423       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1424       if(nidx) {
1425         ++s;
1426       }
1427       xstart += maxwidth;
1428     }/* for(i = 0; i < Mdomains; ++i) */
1429     ystart += maxheight;
1430   }/* for(j = 0; j < Ndomains; ++j) */
1431   /* Now we can allocate the necessary number of ISs. */
1432   *nsub = s;
1433   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1434   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1435   s = 0;
1436   ystart = 0;
1437   for (j=0; j<Ndomains; ++j) {
1438     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1439     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1440     /* Vertical domain limits with an overlap. */
1441     y[0][0] = PetscMax(ystart - overlap,0);
1442     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1443     /* Vertical domain limits without an overlap. */
1444     y[1][0] = ystart;
1445     y[1][1] = PetscMin(ystart + maxheight,N);
1446     xstart = 0;
1447     for (i=0; i<Mdomains; ++i) {
1448       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1449       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1450       /* Horizontal domain limits with an overlap. */
1451       x[0][0]  = PetscMax(xstart - overlap,0);
1452       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1453       /* Horizontal domain limits without an overlap. */
1454       x[1][0] = xstart;
1455       x[1][1] = PetscMin(xstart+maxwidth,M);
1456       /*
1457 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1458 	 Do this twice: first for the domains with overlaps, and once without.
1459 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1460       */
1461       for(q = 0; q < 2; ++q) {
1462 	/*
1463 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1464 	  according to whether the domain with an overlap or without is considered.
1465 	*/
1466 	xleft = x[q][0]; xright = x[q][1];
1467 	ylow  = y[q][0]; yhigh  = y[q][1];
1468         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1469 	nidx *= dof;
1470         n[q] = nidx;
1471         /*
1472          Based on the counted number of indices in the local domain *with an overlap*,
1473          construct a subcommunicator of all the processors supporting this domain.
1474          Observe that a domain with an overlap might have nontrivial local support,
1475          while the domain without an overlap might not.  Hence, the decision to participate
1476          in the subcommunicator must be based on the domain with an overlap.
1477          */
1478 	if (q == 0) {
1479 	  if(nidx) {
1480 	    color = 1;
1481 	  } else {
1482 	    color = MPI_UNDEFINED;
1483 	  }
1484 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1485 	}
1486         /*
1487          Proceed only if the number of local indices *with an overlap* is nonzero.
1488          */
1489         if (n[0]) {
1490           if(q == 0) {
1491             iis = is;
1492           }
1493           if (q == 1) {
1494             /*
1495              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1496              Moreover, if the overlap is zero, the two ISs are identical.
1497              */
1498             if (overlap == 0) {
1499               (*is_local)[s] = (*is)[s];
1500               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1501               continue;
1502             } else {
1503               iis = is_local;
1504               subcomm = ((PetscObject)(*is)[s])->comm;
1505             }
1506           }/* if(q == 1) */
1507           idx = PETSC_NULL;
1508 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1509           if(nidx) {
1510             k    = 0;
1511             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1512               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1513               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1514               kk = dof*(M*jj + x0);
1515               for (ii=x0; ii<x1; ++ii) {
1516                 for(d = 0; d < dof; ++d) {
1517                   idx[k++] = kk++;
1518                 }
1519               }
1520             }
1521           }
1522 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1523 	}/* if(n[0]) */
1524       }/* for(q = 0; q < 2; ++q) */
1525       if(n[0]) {
1526         ++s;
1527       }
1528       xstart += maxwidth;
1529     }/* for(i = 0; i < Mdomains; ++i) */
1530     ystart += maxheight;
1531   }/* for(j = 0; j < Ndomains; ++j) */
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1537 /*@C
1538     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1539     only) for the additive Schwarz preconditioner.
1540 
1541     Not Collective
1542 
1543     Input Parameter:
1544 .   pc - the preconditioner context
1545 
1546     Output Parameters:
1547 +   n - the number of subdomains for this processor (default value = 1)
1548 .   is - the index sets that define the subdomains for this processor
1549 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1550 
1551 
1552     Notes:
1553     The IS numbering is in the parallel, global numbering of the vector.
1554 
1555     Level: advanced
1556 
1557 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1558 
1559 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1560           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1561 @*/
1562 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1563 {
1564   PC_GASM         *osm;
1565   PetscErrorCode ierr;
1566   PetscBool      match;
1567 
1568   PetscFunctionBegin;
1569   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1570   PetscValidIntPointer(n,2);
1571   if (is) PetscValidPointer(is,3);
1572   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1573   if (!match) {
1574     if (n)  *n  = 0;
1575     if (is) *is = PETSC_NULL;
1576   } else {
1577     osm = (PC_GASM*)pc->data;
1578     if (n)  *n  = osm->n;
1579     if (is) *is = osm->is;
1580     if (is_local) *is_local = osm->is_local;
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1587 /*@C
1588     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1589     only) for the additive Schwarz preconditioner.
1590 
1591     Not Collective
1592 
1593     Input Parameter:
1594 .   pc - the preconditioner context
1595 
1596     Output Parameters:
1597 +   n - the number of matrices for this processor (default value = 1)
1598 -   mat - the matrices
1599 
1600 
1601     Level: advanced
1602 
1603 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1604 
1605 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1606           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1607 @*/
1608 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1609 {
1610   PC_GASM         *osm;
1611   PetscErrorCode ierr;
1612   PetscBool      match;
1613 
1614   PetscFunctionBegin;
1615   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1616   PetscValidIntPointer(n,2);
1617   if (mat) PetscValidPointer(mat,3);
1618   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1619   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1620   if (!match) {
1621     if (n)   *n   = 0;
1622     if (mat) *mat = PETSC_NULL;
1623   } else {
1624     osm = (PC_GASM*)pc->data;
1625     if (n)   *n   = osm->n;
1626     if (mat) *mat = osm->pmat;
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630