xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 86b96d36edd017f56cdf158a37b9718445325f8f)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
5   In this version each processor may have any number of subdomains and a subdomain may live on multiple
6   processors.
7 
8        N    - total number of subdomains on all processors
9        n    - actual number of subdomains on this processor
10        nmax - maximum number of subdomains per processor
11 */
12 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
13 
14 typedef struct {
15   PetscInt   N,n,nmax;
16   PetscInt   overlap;             /* overlap requested by user */
17   KSP        *ksp;                /* linear solvers for each block */
18   Vec        gx,gy;               /* Merged work vectors */
19   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
20   IS         gis, gis_local;      /* merged ISs */
21   VecScatter grestriction;        /* merged restriction */
22   VecScatter gprolongation;       /* merged prolongation */
23   IS         *is;                 /* index set that defines each overlapping subdomain */
24   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
25   Mat        *pmat;               /* subdomain block matrices */
26   PCGASMType  type;               /* use reduced interpolation, restriction or both */
27   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
28   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
29   PetscBool  sort_indices;        /* flag to sort subdomain indices */
30 } PC_GASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCGASMPrintSubdomains"
34 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
35 {
36   PC_GASM         *osm  = (PC_GASM*)pc->data;
37   const char     *prefix;
38   char           fname[PETSC_MAX_PATH_LEN+1];
39   PetscViewer    viewer, sviewer;
40   PetscInt       i,j,nidx;
41   const PetscInt *idx;
42   char           *cidx;
43   PetscBool      found;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
48   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
49   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
50   for (i=0;i<osm->n;++i) {
51     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
52     ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr);
53     ierr = ISGetIndices(osm->is[i], &idx);    CHKERRQ(ierr);
54     ierr = PetscMalloc(sizeof(char)*(16*nidx+1), &cidx); CHKERRQ(ierr);  /* no more than 15 characters per index plus a space*/
55     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*nidx, &sviewer); CHKERRQ(ierr);
56     for(j = 0; j < nidx; ++j) {
57       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
58     }
59     ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
60     ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr);
61     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr);
62     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
63     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
64     ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
65     ierr = PetscFree(cidx); CHKERRQ(ierr);
66     ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
67     if (osm->is_local) {
68       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
69       ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr);
70       ierr = ISGetIndices(osm->is_local[i], &idx);    CHKERRQ(ierr);
71       ierr = PetscMalloc(sizeof(char)*(16*nidx+1), &cidx); CHKERRQ(ierr);  /* no more than 15 characters per index plus a space*/
72       ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*nidx, &sviewer); CHKERRQ(ierr);
73       for(j = 0; j < nidx; ++j) {
74 	ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
75       }
76       ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
77       ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr);
78       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr);
79       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
80       ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
81       ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
82       ierr = PetscFree(cidx); CHKERRQ(ierr);
83       ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
84     }
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "PCView_GASM"
92 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
93 {
94   PC_GASM         *osm = (PC_GASM*)pc->data;
95   const char     *prefix;
96   PetscErrorCode ierr;
97   PetscMPIInt    rank, size;
98   PetscInt       i,bsz;
99   PetscBool      iascii,isstring, print_subdomains;
100   PetscViewer    sviewer;
101 
102 
103   PetscFunctionBegin;
104   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
105   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
106   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
107   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
108   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
109   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
110   if (iascii) {
111     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
112     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
113     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
114     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
115     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
116     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
117     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
118     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
119     if (osm->same_local_solves) {
120       if (osm->ksp) {
121         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
122         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
123         if (!rank) {
124           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
125           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
126           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
127         }
128         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
129       }
130     } else {
131       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
132       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
133       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
134       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
135       for (i=0; i<osm->nmax; i++) {
136         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
137         if (i < osm->n) {
138           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
139           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
140           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
141           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
142         }
143         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
144       }
145       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
146       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
147     }
148   } else if (isstring) {
149     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
150     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
151     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
152     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
153   } else {
154     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
155   }
156   ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
157   if(print_subdomains) {
158     ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr);
159   }
160   PetscFunctionReturn(0);
161 }
162 
163 
164 
165 
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "PCSetUp_GASM"
169 static PetscErrorCode PCSetUp_GASM(PC pc)
170 {
171   PC_GASM         *osm  = (PC_GASM*)pc->data;
172   PetscErrorCode ierr;
173   PetscBool      symset,flg;
174   PetscInt       i;
175   PetscMPIInt    rank, size;
176   MatReuse       scall = MAT_REUSE_MATRIX;
177   KSP            ksp;
178   PC             subpc;
179   const char     *prefix,*pprefix;
180   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
181   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
182   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
183   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
184   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
185   Vec            x,y;
186   PetscScalar    *gxarray, *gyarray;
187   PetscInt       gfirst, glast;
188 
189   PetscFunctionBegin;
190   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
191   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
192   if (!pc->setupcalled) {
193 
194     if (!osm->type_set) {
195       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
196       if (symset && flg) { osm->type = PC_GASM_BASIC; }
197     }
198 
199     if (osm->N == PETSC_DECIDE && osm->n < 1) {
200       /* no subdomains given, use one per processor */
201       osm->nmax = osm->n = 1;
202       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
203       osm->N = size;
204     } else if (osm->N == PETSC_DECIDE) {
205       PetscInt inwork[2], outwork[2];
206       /* determine global number of subdomains and the max number of local subdomains */
207       inwork[0] = inwork[1] = osm->n;
208       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
209       osm->nmax = outwork[0];
210       osm->N    = outwork[1];
211     }
212     if (!osm->is){ /* create the index sets */
213       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
214     }
215     if (!osm->is_local) {
216       /*
217 	 This indicates that osm->is should define a nonoverlapping decomposition
218 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
219 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
220 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
221 	 Therefore, osm->is will be used to define osm->is_local.
222 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
223 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
224 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
225       */
226       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
227       for (i=0; i<osm->n; i++) {
228         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
229           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
230           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
231         } else {
232           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
233           osm->is_local[i] = osm->is[i];
234         }
235       }
236     }
237     /* Beyond this point osm->is_local is not null. */
238     if (osm->overlap > 0) {
239       /* Extend the "overlapping" regions by a number of steps */
240       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
241     }
242     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
243     flg  = PETSC_FALSE;
244     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
245     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
246 
247     if (osm->sort_indices) {
248       for (i=0; i<osm->n; i++) {
249         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
250 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
251       }
252     }
253     /* Merge the ISs, create merged vectors and scatter contexts. */
254     /* Restriction ISs. */
255     ddn = 0;
256     for (i=0; i<osm->n; i++) {
257       ierr = ISGetLocalSize(osm->is[i],&dn);          CHKERRQ(ierr);
258       ddn += dn;
259     }
260     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr);
261     ddn = 0;
262     for (i=0; i<osm->n; i++) {
263       ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr);
264       ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr);
265       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr);
266       ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr);
267       ddn += dn;
268     }
269     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr);
270     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
271     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr);
272     ierr = VecDuplicate(osm->gx,&osm->gy);                                     CHKERRQ(ierr);
273     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);                     CHKERRQ(ierr);
274     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);              CHKERRQ(ierr);
275     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));     CHKERRQ(ierr);
276     ierr = ISDestroy(gid);                                                     CHKERRQ(ierr);
277     /* Prolongation ISs */
278     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
279       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
280       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
281       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
282       /**/
283       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
284       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
285       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
286       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
287       PetscInt               j;
288       ddn_local = 0;
289       for (i=0; i<osm->n; i++) {
290 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
291 	ddn_local += dn_local;
292       }
293       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr);
294       ddn_local = 0;
295       for (i=0; i<osm->n; i++) {
296 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
297 	ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr);
298 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr);
299 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr);
300 	ddn_local += dn_local;
301       }
302       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr);
303       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr);
304       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
305       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
306       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
307       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
308       /* Now convert these localized indices into the global indices into the merged output vector. */
309       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr);
310       for(j=0; j < ddn_llocal; ++j) {
311 	ddidx_llocal[j] += gfirst;
312       }
313       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr);
314       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);              CHKERRQ(ierr);
315       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
316     }
317     /* Create the subdomain work vectors. */
318     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
319     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
320     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
321     ierr = VecGetArray(osm->gx, &gxarray);                                     CHKERRQ(ierr);
322     ierr = VecGetArray(osm->gy, &gyarray);                                     CHKERRQ(ierr);
323     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
324       PetscInt dN;
325       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
326       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
327       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
328       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
329     }
330     ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr);
331     ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr);
332     ierr = VecDestroy(x); CHKERRQ(ierr);
333     ierr = VecDestroy(y); CHKERRQ(ierr);
334     /* Create the local solvers */
335     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
336     for (i=0; i<osm->n; i++) { /* KSPs are local */
337       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
338       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
339       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
340       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
341       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
342       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
343       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
344       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
345       osm->ksp[i] = ksp;
346     }
347     scall = MAT_INITIAL_MATRIX;
348 
349   } else {
350     /*
351        Destroy the blocks from the previous iteration
352     */
353     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
354       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
355       scall = MAT_INITIAL_MATRIX;
356     }
357   }
358 
359   /*
360      Extract out the submatrices.
361   */
362   if(size > 1) {
363     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
364   }
365   else {
366     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
367   }
368   if (scall == MAT_INITIAL_MATRIX) {
369     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
370     for (i=0; i<osm->n; i++) {
371       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
372       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
373     }
374   }
375 
376   /* Return control to the user so that the submatrices can be modified (e.g., to apply
377      different boundary conditions for the submatrices than for the global problem) */
378   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
379 
380   /*
381      Loop over submatrices putting them into local ksp
382   */
383   for (i=0; i<osm->n; i++) {
384     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
385     if (!pc->setupcalled) {
386       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
387     }
388   }
389 
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
395 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
396 {
397   PC_GASM         *osm = (PC_GASM*)pc->data;
398   PetscErrorCode ierr;
399   PetscInt       i;
400 
401   PetscFunctionBegin;
402   for (i=0; i<osm->n; i++) {
403     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCApply_GASM"
410 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
411 {
412   PC_GASM         *osm = (PC_GASM*)pc->data;
413   PetscErrorCode ierr;
414   PetscInt       i;
415   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
416 
417   PetscFunctionBegin;
418   /*
419      Support for limiting the restriction or interpolation to only local
420      subdomain values (leaving the other values 0).
421   */
422   if (!(osm->type & PC_GASM_RESTRICT)) {
423     forward = SCATTER_FORWARD_LOCAL;
424     /* have to zero the work RHS since scatter may leave some slots empty */
425     ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr);
426   }
427   if (!(osm->type & PC_GASM_INTERPOLATE)) {
428     reverse = SCATTER_REVERSE_LOCAL;
429   }
430 
431   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
432   ierr = VecZeroEntries(y);CHKERRQ(ierr);
433   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
434   /* do the subdomain solves */
435   for (i=0; i<osm->n; ++i) {
436     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
437   }
438   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
439   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "PCApplyTranspose_GASM"
445 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
446 {
447   PC_GASM         *osm = (PC_GASM*)pc->data;
448   PetscErrorCode ierr;
449   PetscInt       i;
450   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
451 
452   PetscFunctionBegin;
453   /*
454      Support for limiting the restriction or interpolation to only local
455      subdomain values (leaving the other values 0).
456 
457      Note: these are reversed from the PCApply_GASM() because we are applying the
458      transpose of the three terms
459   */
460   if (!(osm->type & PC_GASM_INTERPOLATE)) {
461     forward = SCATTER_FORWARD_LOCAL;
462     /* have to zero the work RHS since scatter may leave some slots empty */
463     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
464   }
465   if (!(osm->type & PC_GASM_RESTRICT)) {
466     reverse = SCATTER_REVERSE_LOCAL;
467   }
468 
469   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
470   ierr = VecZeroEntries(y);CHKERRQ(ierr);
471   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
472   /* do the local solves */
473   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
474     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
475   }
476   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
477   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "PCDestroy_GASM"
483 static PetscErrorCode PCDestroy_GASM(PC pc)
484 {
485   PC_GASM         *osm = (PC_GASM*)pc->data;
486   PetscErrorCode ierr;
487   PetscInt       i;
488 
489   PetscFunctionBegin;
490   if (osm->ksp) {
491     for (i=0; i<osm->n; i++) {
492       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
493     }
494     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
495   }
496   if (osm->pmat) {
497     if (osm->n > 0) {
498       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
499     }
500   }
501   for (i=0; i<osm->n; i++) {
502     ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
503     ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
504   }
505   ierr = PetscFree(osm->x);CHKERRQ(ierr);
506   ierr = PetscFree(osm->y);CHKERRQ(ierr);
507   ierr = VecDestroy(osm->gx); CHKERRQ(ierr);
508   ierr = VecDestroy(osm->gy); CHKERRQ(ierr);
509 
510   ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr);
511   ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr);
512 
513   if (osm->is) {
514     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
515   }
516   ierr = PetscFree(osm);CHKERRQ(ierr);
517   PetscFunctionReturn(0);
518 }
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "PCSetFromOptions_GASM"
522 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
523   PC_GASM         *osm = (PC_GASM*)pc->data;
524   PetscErrorCode ierr;
525   PetscInt       blocks,ovl;
526   PetscBool      symset,flg;
527   PCGASMType      gasmtype;
528 
529   PetscFunctionBegin;
530   /* set the type to symmetric if matrix is symmetric */
531   if (!osm->type_set && pc->pmat) {
532     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
533     if (symset && flg) { osm->type = PC_GASM_BASIC; }
534   }
535   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
536     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
537     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
538     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
539     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
540     flg  = PETSC_FALSE;
541     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
542     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
543   ierr = PetscOptionsTail();CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 /*------------------------------------------------------------------------------------*/
548 
549 EXTERN_C_BEGIN
550 #undef __FUNCT__
551 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
552 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
553 {
554   PC_GASM         *osm = (PC_GASM*)pc->data;
555   PetscErrorCode ierr;
556   PetscInt       i;
557 
558   PetscFunctionBegin;
559   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
560   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
561 
562   if (!pc->setupcalled) {
563     osm->n            = n;
564     osm->is           = 0;
565     osm->is_local     = 0;
566     if (is) {
567       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
568     }
569     if (is_local) {
570       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
571     }
572     if (osm->is) {
573       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
574     }
575     if (is) {
576       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
577       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
578       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
579       osm->overlap = -1;
580     }
581     if (is_local) {
582       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
583       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 EXTERN_C_END
589 
590 EXTERN_C_BEGIN
591 #undef __FUNCT__
592 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
593 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
594   PC_GASM         *osm = (PC_GASM*)pc->data;
595   PetscErrorCode ierr;
596   PetscMPIInt    rank,size;
597   PetscInt       n;
598 
599   PetscFunctionBegin;
600   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
601 
602   /*
603      Split the subdomains equally among all processors
604   */
605   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
606   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
607   n = N/size + ((N % size) > rank);
608   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
609   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
610   if (!pc->setupcalled) {
611     if (osm->is) {
612       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
613     }
614     osm->N            = N;
615     osm->n            = n;
616     osm->is           = 0;
617     osm->is_local     = 0;
618   }
619   PetscFunctionReturn(0);
620 }/* PCGASMSetTotalSubdomains_GASM() */
621 EXTERN_C_END
622 
623 EXTERN_C_BEGIN
624 #undef __FUNCT__
625 #define __FUNCT__ "PCGASMSetOverlap_GASM"
626 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
627 {
628   PC_GASM *osm = (PC_GASM*)pc->data;
629 
630   PetscFunctionBegin;
631   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
632   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
633   if (!pc->setupcalled) {
634     osm->overlap = ovl;
635   }
636   PetscFunctionReturn(0);
637 }
638 EXTERN_C_END
639 
640 EXTERN_C_BEGIN
641 #undef __FUNCT__
642 #define __FUNCT__ "PCGASMSetType_GASM"
643 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetType_GASM(PC pc,PCGASMType type)
644 {
645   PC_GASM *osm = (PC_GASM*)pc->data;
646 
647   PetscFunctionBegin;
648   osm->type     = type;
649   osm->type_set = PETSC_TRUE;
650   PetscFunctionReturn(0);
651 }
652 EXTERN_C_END
653 
654 EXTERN_C_BEGIN
655 #undef __FUNCT__
656 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
657 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
658 {
659   PC_GASM *osm = (PC_GASM*)pc->data;
660 
661   PetscFunctionBegin;
662   osm->sort_indices = doSort;
663   PetscFunctionReturn(0);
664 }
665 EXTERN_C_END
666 
667 EXTERN_C_BEGIN
668 #undef __FUNCT__
669 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
670 /*
671    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
672         In particular, it would upset the global subdomain number calculation.
673 */
674 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
675 {
676   PC_GASM         *osm = (PC_GASM*)pc->data;
677   PetscErrorCode ierr;
678 
679   PetscFunctionBegin;
680   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
681 
682   if (n) {
683     *n = osm->n;
684   }
685   if (first) {
686     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
687     *first -= osm->n;
688   }
689   if (ksp) {
690     /* Assume that local solves are now different; not necessarily
691        true though!  This flag is used only for PCView_GASM() */
692     *ksp                   = osm->ksp;
693     osm->same_local_solves = PETSC_FALSE;
694   }
695   PetscFunctionReturn(0);
696 }/* PCGASMGetSubKSP_GASM() */
697 EXTERN_C_END
698 
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "PCGASMSetLocalSubdomains"
702 /*@C
703     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
704     only) for the additive Schwarz preconditioner.
705 
706     Collective on PC
707 
708     Input Parameters:
709 +   pc - the preconditioner context
710 .   n - the number of subdomains for this processor (default value = 1)
711 .   is - the index set that defines the subdomains for this processor
712          (or PETSC_NULL for PETSc to determine subdomains)
713 -   is_local - the index sets that define the local part of the subdomains for this processor
714          (or PETSC_NULL to use the default of 1 subdomain per process)
715 
716     Notes:
717     The IS numbering is in the parallel, global numbering of the vector.
718 
719     By default the GASM preconditioner uses 1 block per processor.
720 
721     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
722 
723     Level: advanced
724 
725 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
726 
727 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
728           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
729 @*/
730 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
731 {
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
736   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "PCGASMSetTotalSubdomains"
742 /*@C
743     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
744     additive Schwarz preconditioner.  Either all or no processors in the
745     PC communicator must call this routine, with the same index sets.
746 
747     Collective on PC
748 
749     Input Parameters:
750 +   pc - the preconditioner context
751 .   n - the number of subdomains for all processors
752 .   is - the index sets that define the subdomains for all processor
753          (or PETSC_NULL for PETSc to determine subdomains)
754 -   is_local - the index sets that define the local part of the subdomains for this processor
755          (or PETSC_NULL to use the default of 1 subdomain per process)
756 
757     Options Database Key:
758     To set the total number of subdomain blocks rather than specify the
759     index sets, use the option
760 .    -pc_gasm_blocks <blks> - Sets total blocks
761 
762     Notes:
763     Currently you cannot use this to set the actual subdomains with the argument is.
764 
765     By default the GASM preconditioner uses 1 block per processor.
766 
767     These index sets cannot be destroyed until after completion of the
768     linear solves for which the GASM preconditioner is being used.
769 
770     Use PCGASMSetLocalSubdomains() to set local subdomains.
771 
772     Level: advanced
773 
774 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
775 
776 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
777           PCGASMCreateSubdomains2D()
778 @*/
779 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetTotalSubdomains(PC pc,PetscInt N)
780 {
781   PetscErrorCode ierr;
782 
783   PetscFunctionBegin;
784   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
785   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
786   PetscFunctionReturn(0);
787 }
788 
789 #undef __FUNCT__
790 #define __FUNCT__ "PCGASMSetOverlap"
791 /*@
792     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
793     additive Schwarz preconditioner.  Either all or no processors in the
794     PC communicator must call this routine.
795 
796     Logically Collective on PC
797 
798     Input Parameters:
799 +   pc  - the preconditioner context
800 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
801 
802     Options Database Key:
803 .   -pc_gasm_overlap <ovl> - Sets overlap
804 
805     Notes:
806     By default the GASM preconditioner uses 1 block per processor.  To use
807     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
808     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
809 
810     The overlap defaults to 1, so if one desires that no additional
811     overlap be computed beyond what may have been set with a call to
812     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
813     must be set to be 0.  In particular, if one does not explicitly set
814     the subdomains an application code, then all overlap would be computed
815     internally by PETSc, and using an overlap of 0 would result in an GASM
816     variant that is equivalent to the block Jacobi preconditioner.
817 
818     Note that one can define initial index sets with any overlap via
819     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
820     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
821     if desired.
822 
823     Level: intermediate
824 
825 .keywords: PC, GASM, set, overlap
826 
827 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
828           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
829 @*/
830 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetOverlap(PC pc,PetscInt ovl)
831 {
832   PetscErrorCode ierr;
833 
834   PetscFunctionBegin;
835   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
836   PetscValidLogicalCollectiveInt(pc,ovl,2);
837   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "PCGASMSetType"
843 /*@
844     PCGASMSetType - Sets the type of restriction and interpolation used
845     for local problems in the additive Schwarz method.
846 
847     Logically Collective on PC
848 
849     Input Parameters:
850 +   pc  - the preconditioner context
851 -   type - variant of GASM, one of
852 .vb
853       PC_GASM_BASIC       - full interpolation and restriction
854       PC_GASM_RESTRICT    - full restriction, local processor interpolation
855       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
856       PC_GASM_NONE        - local processor restriction and interpolation
857 .ve
858 
859     Options Database Key:
860 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
861 
862     Level: intermediate
863 
864 .keywords: PC, GASM, set, type
865 
866 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
867           PCGASMCreateSubdomains2D()
868 @*/
869 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetType(PC pc,PCGASMType type)
870 {
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
875   PetscValidLogicalCollectiveEnum(pc,type,2);
876   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 #undef __FUNCT__
881 #define __FUNCT__ "PCGASMSetSortIndices"
882 /*@
883     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
884 
885     Logically Collective on PC
886 
887     Input Parameters:
888 +   pc  - the preconditioner context
889 -   doSort - sort the subdomain indices
890 
891     Level: intermediate
892 
893 .keywords: PC, GASM, set, type
894 
895 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
896           PCGASMCreateSubdomains2D()
897 @*/
898 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMSetSortIndices(PC pc,PetscBool  doSort)
899 {
900   PetscErrorCode ierr;
901 
902   PetscFunctionBegin;
903   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
904   PetscValidLogicalCollectiveBool(pc,doSort,2);
905   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
906   PetscFunctionReturn(0);
907 }
908 
909 #undef __FUNCT__
910 #define __FUNCT__ "PCGASMGetSubKSP"
911 /*@C
912    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
913    this processor.
914 
915    Collective on PC iff first_local is requested
916 
917    Input Parameter:
918 .  pc - the preconditioner context
919 
920    Output Parameters:
921 +  n_local - the number of blocks on this processor or PETSC_NULL
922 .  first_local - the global number of the first block on this processor or PETSC_NULL,
923                  all processors must request or all must pass PETSC_NULL
924 -  ksp - the array of KSP contexts
925 
926    Note:
927    After PCGASMGetSubKSP() the array of KSPes is not to be freed
928 
929    Currently for some matrix implementations only 1 block per processor
930    is supported.
931 
932    You must call KSPSetUp() before calling PCGASMGetSubKSP().
933 
934    Level: advanced
935 
936 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
937 
938 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
939           PCGASMCreateSubdomains2D(),
940 @*/
941 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
942 {
943   PetscErrorCode ierr;
944 
945   PetscFunctionBegin;
946   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
947   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 /* -------------------------------------------------------------------------------------*/
952 /*MC
953    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
954            its own KSP object.
955 
956    Options Database Keys:
957 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
958 .  -pc_gasm_blocks <blks> - Sets total blocks
959 .  -pc_gasm_overlap <ovl> - Sets overlap
960 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
961 
962      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
963       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
964       -pc_gasm_type basic to use the standard GASM.
965 
966    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
967      than one processor. Defaults to one block per processor.
968 
969      To set options on the solvers for each block append -sub_ to all the KSP, and PC
970         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
971 
972      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
973          and set the options directly on the resulting KSP object (you can access its PC
974          with KSPGetPC())
975 
976 
977    Level: beginner
978 
979    Concepts: additive Schwarz method
980 
981     References:
982     An additive variant of the Schwarz alternating method for the case of many subregions
983     M Dryja, OB Widlund - Courant Institute, New York University Technical report
984 
985     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
986     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
987 
988 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
989            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
990            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
991 
992 M*/
993 
994 EXTERN_C_BEGIN
995 #undef __FUNCT__
996 #define __FUNCT__ "PCCreate_GASM"
997 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_GASM(PC pc)
998 {
999   PetscErrorCode ierr;
1000   PC_GASM         *osm;
1001 
1002   PetscFunctionBegin;
1003   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1004   osm->N                 = PETSC_DECIDE;
1005   osm->n                 = 0;
1006   osm->nmax              = 0;
1007   osm->overlap           = 1;
1008   osm->ksp               = 0;
1009   osm->grestriction      = 0;
1010   osm->gprolongation     = 0;
1011   osm->gx                = 0;
1012   osm->gy                = 0;
1013   osm->x                 = 0;
1014   osm->y                 = 0;
1015   osm->is                = 0;
1016   osm->is_local          = 0;
1017   osm->pmat              = 0;
1018   osm->type              = PC_GASM_RESTRICT;
1019   osm->same_local_solves = PETSC_TRUE;
1020   osm->sort_indices      = PETSC_TRUE;
1021 
1022   pc->data                   = (void*)osm;
1023   pc->ops->apply             = PCApply_GASM;
1024   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1025   pc->ops->setup             = PCSetUp_GASM;
1026   pc->ops->destroy           = PCDestroy_GASM;
1027   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1028   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1029   pc->ops->view              = PCView_GASM;
1030   pc->ops->applyrichardson   = 0;
1031 
1032   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1033                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1034   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1035                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1036   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1037                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1038   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1039                     PCGASMSetType_GASM);CHKERRQ(ierr);
1040   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1041                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1042   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1043                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1044   PetscFunctionReturn(0);
1045 }
1046 EXTERN_C_END
1047 
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "PCGASMCreateSubdomains"
1051 /*@C
1052    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1053    preconditioner for a any problem on a general grid.
1054 
1055    Collective
1056 
1057    Input Parameters:
1058 +  A - The global matrix operator
1059 -  n - the number of local blocks
1060 
1061    Output Parameters:
1062 .  outis - the array of index sets defining the subdomains
1063 
1064    Level: advanced
1065 
1066    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1067     from these if you use PCGASMSetLocalSubdomains()
1068 
1069     In the Fortran version you must provide the array outis[] already allocated of length n.
1070 
1071 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1072 
1073 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1074 @*/
1075 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1076 {
1077   MatPartitioning           mpart;
1078   const char                *prefix;
1079   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1080   PetscMPIInt               size;
1081   PetscInt                  i,j,rstart,rend,bs;
1082   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1083   Mat                       Ad = PETSC_NULL, adj;
1084   IS                        ispart,isnumb,*is;
1085   PetscErrorCode            ierr;
1086 
1087   PetscFunctionBegin;
1088   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1089   PetscValidPointer(outis,3);
1090   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1091 
1092   /* Get prefix, row distribution, and block size */
1093   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1094   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1095   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1096   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1097 
1098   /* Get diagonal block from matrix if possible */
1099   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1100   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1101   if (f) {
1102     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1103   } else if (size == 1) {
1104     iscopy = PETSC_FALSE; Ad = A;
1105   } else {
1106     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1107   }
1108   if (Ad) {
1109     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1110     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1111   }
1112   if (Ad && n > 1) {
1113     PetscBool  match,done;
1114     /* Try to setup a good matrix partitioning if available */
1115     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1116     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1117     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1118     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1119     if (!match) {
1120       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1121     }
1122     if (!match) { /* assume a "good" partitioner is available */
1123       PetscInt na,*ia,*ja;
1124       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1125       if (done) {
1126 	/* Build adjacency matrix by hand. Unfortunately a call to
1127 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1128 	   remove the block-aij structure and we cannot expect
1129 	   MatPartitioning to split vertices as we need */
1130 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1131 	nnz = 0;
1132 	for (i=0; i<na; i++) { /* count number of nonzeros */
1133 	  len = ia[i+1] - ia[i];
1134 	  row = ja + ia[i];
1135 	  for (j=0; j<len; j++) {
1136 	    if (row[j] == i) { /* don't count diagonal */
1137 	      len--; break;
1138 	    }
1139 	  }
1140 	  nnz += len;
1141 	}
1142 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1143 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1144 	nnz    = 0;
1145 	iia[0] = 0;
1146 	for (i=0; i<na; i++) { /* fill adjacency */
1147 	  cnt = 0;
1148 	  len = ia[i+1] - ia[i];
1149 	  row = ja + ia[i];
1150 	  for (j=0; j<len; j++) {
1151 	    if (row[j] != i) { /* if not diagonal */
1152 	      jja[nnz+cnt++] = row[j];
1153 	    }
1154 	  }
1155 	  nnz += cnt;
1156 	  iia[i+1] = nnz;
1157 	}
1158 	/* Partitioning of the adjacency matrix */
1159 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1160 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1161 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1162 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1163 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1164 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1165 	foundpart = PETSC_TRUE;
1166       }
1167       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1168     }
1169     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1170   }
1171   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1172 
1173   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1174   *outis = is;
1175 
1176   if (!foundpart) {
1177 
1178     /* Partitioning by contiguous chunks of rows */
1179 
1180     PetscInt mbs   = (rend-rstart)/bs;
1181     PetscInt start = rstart;
1182     for (i=0; i<n; i++) {
1183       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1184       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1185       start += count;
1186     }
1187 
1188   } else {
1189 
1190     /* Partitioning by adjacency of diagonal block  */
1191 
1192     const PetscInt *numbering;
1193     PetscInt       *count,nidx,*indices,*newidx,start=0;
1194     /* Get node count in each partition */
1195     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1196     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1197     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1198       for (i=0; i<n; i++) count[i] *= bs;
1199     }
1200     /* Build indices from node numbering */
1201     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1202     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1203     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1204     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1205     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1206     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1207     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1208       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1209       for (i=0; i<nidx; i++)
1210 	for (j=0; j<bs; j++)
1211 	  newidx[i*bs+j] = indices[i]*bs + j;
1212       ierr = PetscFree(indices);CHKERRQ(ierr);
1213       nidx   *= bs;
1214       indices = newidx;
1215     }
1216     /* Shift to get global indices */
1217     for (i=0; i<nidx; i++) indices[i] += rstart;
1218 
1219     /* Build the index sets for each block */
1220     for (i=0; i<n; i++) {
1221       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1222       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1223       start += count[i];
1224     }
1225 
1226     ierr = PetscFree(count);
1227     ierr = PetscFree(indices);
1228     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1229     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1230 
1231   }
1232 
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "PCGASMDestroySubdomains"
1238 /*@C
1239    PCGASMDestroySubdomains - Destroys the index sets created with
1240    PCGASMCreateSubdomains(). Should be called after setting subdomains
1241    with PCGASMSetLocalSubdomains().
1242 
1243    Collective
1244 
1245    Input Parameters:
1246 +  n - the number of index sets
1247 .  is - the array of index sets
1248 -  is_local - the array of local index sets, can be PETSC_NULL
1249 
1250    Level: advanced
1251 
1252 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1253 
1254 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1255 @*/
1256 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1257 {
1258   PetscInt       i;
1259   PetscErrorCode ierr;
1260   PetscFunctionBegin;
1261   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1262   PetscValidPointer(is,2);
1263   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1264   ierr = PetscFree(is);CHKERRQ(ierr);
1265   if (is_local) {
1266     PetscValidPointer(is_local,3);
1267     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1268     ierr = PetscFree(is_local);CHKERRQ(ierr);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 
1274 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1275 {                                                                                                       \
1276  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1277   /*                                                                                                    \
1278    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1279    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1280    subdomain).                                                                                          \
1281    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1282    of the intersection.                                                                                 \
1283   */                                                                                                    \
1284   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1285   ylow_loc = PetscMax(first_row,ylow);                                                                    \
1286   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1287   xleft_loc = ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1288   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1289   yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1290   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1291   xright_loc = yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1292   /* Now compute the size of the local subdomain n. */ \
1293   n = 0;                                               \
1294   if(ylow_loc < yhigh_loc) {                           \
1295     PetscInt width = xright-xleft;                     \
1296     n += width*(yhigh_loc-ylow_loc-1);                 \
1297     n += PetscMin(PetscMax(xright_loc-xleft,0),width); \
1298     n -= PetscMin(PetscMax(xleft_loc-xleft,0), width); \
1299   }\
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1304 /*@
1305    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1306    preconditioner for a two-dimensional problem on a regular grid.
1307 
1308    Collective
1309 
1310    Input Parameters:
1311 +  M, N - the global number of mesh points in the x and y directions
1312 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1313 .  dof - degrees of freedom per node
1314 -  overlap - overlap in mesh lines
1315 
1316    Output Parameters:
1317 +  Nsub - the number of local subdomains created
1318 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1319 -  is_local - array of index sets defining non-overlapping subdomains
1320 
1321    Note:
1322    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1323    preconditioners.  More general related routines are
1324    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1325 
1326    Level: advanced
1327 
1328 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1329 
1330 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1331           PCGASMSetOverlap()
1332 @*/
1333 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1334 {
1335   PetscErrorCode ierr;
1336   PetscMPIInt size, rank;
1337   PetscInt i, j;
1338   PetscInt maxheight, maxwidth;
1339   PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1340   PetscInt ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1341   PetscInt x[2][2], y[2][2];
1342   PetscInt first, last;
1343   PetscInt nidx, *idx;
1344   PetscInt ii,jj,s,q,d;
1345   PetscInt k,kk;
1346   PetscMPIInt color;
1347   MPI_Comm comm, subcomm;
1348 
1349   IS **iis;
1350   PetscFunctionBegin;
1351   ierr = PetscObjectGetComm((PetscObject)pc, &comm);     CHKERRQ(ierr);
1352   ierr = MPI_Comm_size(comm, &size);                     CHKERRQ(ierr);
1353   ierr = MPI_Comm_rank(comm, &rank);                     CHKERRQ(ierr);
1354   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);  CHKERRQ(ierr);
1355   if(first%dof || last%dof) {
1356     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1357 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1358   }
1359   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1360   s = 0;
1361   ystart = 0;
1362   for (j=0; j<Ndomains; ++j) {
1363     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1364     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1365     /* Vertical domain limits without an overlap. */
1366     ylow = ystart;
1367     yhigh = PetscMin(ystart + maxheight,N);
1368     xstart = 0;
1369     for (i=0; i<Mdomains; ++i) {
1370       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1371       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1372       /* Horizontal domain limits without an overlap. */
1373       xleft  = xstart;
1374       xright = PetscMin(xstart+maxwidth,M);
1375       /*
1376 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1377       */
1378       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,nidx);
1379       if(nidx) {
1380         ++s;
1381       }
1382       xstart += maxwidth;
1383     }/* for(i = 0; i < Mdomains; ++i) */
1384     ystart += maxheight;
1385   }/* for(j = 0; j < Ndomains; ++j) */
1386   /* Now we can allocate the necessary number of ISs. */
1387   *nsub = s;
1388   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);         CHKERRQ(ierr);
1389   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);   CHKERRQ(ierr);
1390   s = 0;
1391   ystart = 0;
1392   for (j=0; j<Ndomains; ++j) {
1393     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1394     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1395     /* Vertical domain limits without an overlap. */
1396     y[0][0] = ystart;
1397     y[0][1] = PetscMin(ystart + maxheight,N);
1398     /* Vertical domain limits with an overlap. */
1399     y[1][1] = PetscMin(ystart + maxheight + overlap,N);
1400     y[1][0] = PetscMax(ystart - overlap,0);    xstart = 0;
1401     for (i=0; i<Mdomains; ++i) {
1402       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1403       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1404       /* Horizontal domain limits without an overlap. */
1405       x[0][0] = xstart;
1406       x[0][1] = PetscMin(xstart+maxwidth,M);
1407       /* Horizontal domain limits with an overlap. */
1408       x[1][0]  = PetscMax(xstart - overlap,0);
1409       x[1][1]  = PetscMin(xstart + maxwidth + overlap,M);
1410       /*
1411 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1412 	 Do this twice: first for the domains with overlaps, and once without.
1413 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1414       */
1415       for(q = 0; q < 2; ++q) {
1416 	/*
1417 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1418 	  according to whether the domain with an overlap or without is considered.
1419 	*/
1420 	xleft = x[q][0]; xright = x[q][1];
1421 	ylow  = y[q][0]; yhigh  = y[q][1];
1422         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,nidx);
1423 	nidx *= dof;
1424         /*
1425          Based on the counted number of indices in the local domain (without an overlap),
1426          construct a subcommunicator of all the processors supporting this domain.
1427          */
1428 	if(q == 0) {
1429 	  if(nidx) {
1430 	    color = 1;
1431 	  }
1432 	  else {
1433 	    color = MPI_UNDEFINED;
1434 	  }
1435 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr);
1436 	}
1437         /*
1438          Proceed only if the number of local indices is nonzero.
1439          */
1440         if(nidx) {
1441           if(q == 0) {
1442             iis = is_local;
1443           }
1444           if(q == 1) {
1445             /*
1446              The IS for the no-overlap subdomain shares a communicator with the nonoverlapping domain.
1447              Moreover, if the overlap is zero, the two ISs are identical.
1448              */
1449             if(overlap == 0) {
1450               (*is)[s] = (*is_local)[s];
1451               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1452               continue;
1453             }
1454             else {
1455               iis = is;
1456               subcomm = ((PetscObject)(*is_local)[s])->comm;
1457             }
1458           }/* if(q == 1) */
1459 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1460 	  k    = 0;
1461 	  for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1462 	    PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1463 	    PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1464 	    kk = dof*(M*jj + x0);
1465 	    for (ii=x0; ii<x1; ++ii) {
1466 	      for(d = 0; d < dof; ++d) {
1467 		idx[k++] = kk++;
1468 	      }
1469 	    }
1470 	  }
1471 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1472 	}/* if(nidx) */
1473       }/* for(q = 0; q < 2; ++q) */
1474       if(nidx) {
1475         ++s;
1476       }
1477       xstart += maxwidth;
1478     }/* for(i = 0; i < Mdomains; ++i) */
1479     ystart += maxheight;
1480   }/* for(j = 0; j < Ndomains; ++j) */
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1486 /*@C
1487     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1488     only) for the additive Schwarz preconditioner.
1489 
1490     Not Collective
1491 
1492     Input Parameter:
1493 .   pc - the preconditioner context
1494 
1495     Output Parameters:
1496 +   n - the number of subdomains for this processor (default value = 1)
1497 .   is - the index sets that define the subdomains for this processor
1498 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1499 
1500 
1501     Notes:
1502     The IS numbering is in the parallel, global numbering of the vector.
1503 
1504     Level: advanced
1505 
1506 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1507 
1508 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1509           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1510 @*/
1511 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1512 {
1513   PC_GASM         *osm;
1514   PetscErrorCode ierr;
1515   PetscBool      match;
1516 
1517   PetscFunctionBegin;
1518   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1519   PetscValidIntPointer(n,2);
1520   if (is) PetscValidPointer(is,3);
1521   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1522   if (!match) {
1523     if (n)  *n  = 0;
1524     if (is) *is = PETSC_NULL;
1525   } else {
1526     osm = (PC_GASM*)pc->data;
1527     if (n)  *n  = osm->n;
1528     if (is) *is = osm->is;
1529     if (is_local) *is_local = osm->is_local;
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1536 /*@C
1537     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1538     only) for the additive Schwarz preconditioner.
1539 
1540     Not Collective
1541 
1542     Input Parameter:
1543 .   pc - the preconditioner context
1544 
1545     Output Parameters:
1546 +   n - the number of matrices for this processor (default value = 1)
1547 -   mat - the matrices
1548 
1549 
1550     Level: advanced
1551 
1552 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1553 
1554 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1555           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1556 @*/
1557 PetscErrorCode PETSCKSP_DLLEXPORT PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1558 {
1559   PC_GASM         *osm;
1560   PetscErrorCode ierr;
1561   PetscBool      match;
1562 
1563   PetscFunctionBegin;
1564   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1565   PetscValidIntPointer(n,2);
1566   if (mat) PetscValidPointer(mat,3);
1567   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1568   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1569   if (!match) {
1570     if (n)   *n   = 0;
1571     if (mat) *mat = PETSC_NULL;
1572   } else {
1573     osm = (PC_GASM*)pc->data;
1574     if (n)   *n   = osm->n;
1575     if (mat) *mat = osm->pmat;
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579