xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision c0decd05c6848b80907752eef350b55c8c90e696)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73   ierr = PetscMalloc1(16*(nidx+1)+1, &cidx);CHKERRQ(ierr);
74   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);CHKERRQ(ierr);
75   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
76   for (j = 0; j < nidx; ++j) {
77     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
78   }
79   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
80   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
81   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
82   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
84   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
85   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
87   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
88   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
89   ierr = PetscFree(cidx);CHKERRQ(ierr);
90   /* Outer subdomains. */
91   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
92   /*
93    No more than 15 characters per index plus a space.
94    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
95    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
96    For nidx == 0, the whole string 16 '\0'.
97    */
98   ierr = PetscMalloc1(16*(nidx+1)+1, &cidx);CHKERRQ(ierr);
99   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);CHKERRQ(ierr);
100   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
101   for (j = 0; j < nidx; ++j) {
102     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
103   }
104   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
105   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
106   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
107   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
108   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
109   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
110   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
111   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
113   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
114   ierr = PetscFree(cidx);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
119 {
120   PC_GASM        *osm = (PC_GASM*)pc->data;
121   const char     *prefix;
122   char           fname[PETSC_MAX_PATH_LEN+1];
123   PetscInt       l, d, count;
124   PetscBool      doprint,found;
125   PetscViewer    viewer, sviewer = NULL;
126   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
131   doprint  = PETSC_FALSE;
132   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
133   if (!doprint) PetscFunctionReturn(0);
134   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
135   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
136   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
137   /*
138    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
139    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
140   */
141   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
142   l    = 0;
143   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
144   for (count = 0; count < osm->N; ++count) {
145     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
146     if (l<osm->n) {
147       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
148       if (numbering[d] == count) {
149         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
150         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
151         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
152         ++l;
153       }
154     }
155     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
156   }
157   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
158   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
159   PetscFunctionReturn(0);
160 }
161 
162 
163 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
164 {
165   PC_GASM        *osm = (PC_GASM*)pc->data;
166   const char     *prefix;
167   PetscErrorCode ierr;
168   PetscMPIInt    rank, size;
169   PetscInt       bsz;
170   PetscBool      iascii,view_subdomains=PETSC_FALSE;
171   PetscViewer    sviewer;
172   PetscInt       count, l;
173   char           overlap[256]     = "user-defined overlap";
174   char           gsubdomains[256] = "unknown total number of subdomains";
175   char           msubdomains[256] = "unknown max number of local subdomains";
176   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
177 
178   PetscFunctionBegin;
179   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr);
180   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr);
181 
182   if (osm->overlap >= 0) {
183     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
184   }
185   if (osm->N != PETSC_DETERMINE) {
186     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
187   }
188   if (osm->nmax != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
190   }
191 
192   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
193   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
194 
195   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
196   if (iascii) {
197     /*
198      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
199      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
200      collectively on the comm.
201      */
202     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
203     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
204     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
205     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
208     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
209     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
211     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
212     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
214     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
215     /* Make sure that everybody waits for the banner to be printed. */
216     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
217     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
218     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
219     l = 0;
220     for (count = 0; count < osm->N; ++count) {
221       PetscMPIInt srank, ssize;
222       if (l<osm->n) {
223         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
224         if (numbering[d] == count) {
225           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr);
226           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr);
227           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
228           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
229           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
230           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
231           if (view_subdomains) {
232             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
233           }
234           if (!pc->setupcalled) {
235             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
236           } else {
237             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
238           }
239           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
240           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
241           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
242           ++l;
243         }
244       }
245       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
246     }
247     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
248     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
249   }
250   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
251   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
256 
257 
258 
259 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
260 {
261    PC_GASM              *osm = (PC_GASM*)pc->data;
262    MatPartitioning       part;
263    MPI_Comm              comm;
264    PetscMPIInt           size;
265    PetscInt              nlocalsubdomains,fromrows_localsize;
266    IS                    partitioning,fromrows,isn;
267    Vec                   outervec;
268    PetscErrorCode        ierr;
269 
270    PetscFunctionBegin;
271    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
272    ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273    /* we do not need a hierarchical partitioning when
274     * the total number of subdomains is consistent with
275     * the number of MPI tasks.
276     * For the following cases, we do not need to use HP
277     * */
278    if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
279    if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
280    nlocalsubdomains = size/osm->N;
281    osm->n           = 1;
282    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
283    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
284    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
285    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
286    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
287    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
288    /* get new processor owner number of each vertex */
289    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
290    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
291    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
292    ierr = ISDestroy(&isn);CHKERRQ(ierr);
293    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
294    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
295    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
296    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
297    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
298    ierr = VecScatterCreateWithData(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
299    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
300    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
301    osm->permutationIS = fromrows;
302    osm->pcmat =  pc->pmat;
303    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
304    pc->pmat = osm->permutationP;
305    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
306    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
307    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
308    osm->n           = PETSC_DETERMINE;
309    PetscFunctionReturn(0);
310 }
311 
312 
313 
314 static PetscErrorCode PCSetUp_GASM(PC pc)
315 {
316   PC_GASM        *osm = (PC_GASM*)pc->data;
317   PetscErrorCode ierr;
318   PetscBool      symset,flg;
319   PetscInt       i,nInnerIndices,nTotalInnerIndices;
320   PetscMPIInt    rank, size;
321   MatReuse       scall = MAT_REUSE_MATRIX;
322   KSP            ksp;
323   PC             subpc;
324   const char     *prefix,*pprefix;
325   Vec            x,y;
326   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
327   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
328   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
329   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
330   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
331   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
332   PetscScalar    *gxarray, *gyarray;
333   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
334   PetscInt       num_subdomains    = 0;
335   DM             *subdomain_dm     = NULL;
336   char           **subdomain_names = NULL;
337   PetscInt       *numbering;
338 
339 
340   PetscFunctionBegin;
341   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
342   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
343   if (!pc->setupcalled) {
344 	/* use a hierarchical partitioning */
345     if(osm->hierarchicalpartitioning){
346       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
347     }
348     if (!osm->type_set) {
349       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
350       if (symset && flg) osm->type = PC_GASM_BASIC;
351     }
352 
353     if (osm->n == PETSC_DETERMINE) {
354       if (osm->N != PETSC_DETERMINE) {
355 	   /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
356 	   ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
357       } else if (osm->dm_subdomains && pc->dm) {
358 	/* try pc->dm next, if allowed */
359 	PetscInt  d;
360 	IS       *inner_subdomain_is, *outer_subdomain_is;
361 	ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
362 	if (num_subdomains) {
363 	  ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
364 	}
365 	for (d = 0; d < num_subdomains; ++d) {
366 	  if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
367 	  if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
368 	}
369 	ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
370 	ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
371       } else {
372 	/* still no subdomains; use one per processor */
373 	osm->nmax = osm->n = 1;
374 	ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
375 	osm->N    = size;
376 	ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
377       }
378     }
379     if (!osm->iis) {
380       /*
381        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
382        We create the requisite number of local inner subdomains and then expand them into
383        out subdomains, if necessary.
384        */
385       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
386     }
387     if (!osm->ois) {
388       /*
389 	    Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
390 	    has been requested, copy the inner subdomains over so they can be modified.
391       */
392       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
393       for (i=0; i<osm->n; ++i) {
394 	if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
395 	  ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
396 	  ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
397 	} else {
398 	  ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
399 	  osm->ois[i] = osm->iis[i];
400 	}
401       }
402       if (osm->overlap>0 && osm->N>1) {
403 	   /* Extend the "overlapping" regions by a number of steps */
404 	   ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
405       }
406     }
407 
408     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
409     if (osm->nmax == PETSC_DETERMINE) {
410       PetscMPIInt inwork,outwork;
411       /* determine global number of subdomains and the max number of local subdomains */
412       inwork = osm->n;
413       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
414       osm->nmax  = outwork;
415     }
416     if (osm->N == PETSC_DETERMINE) {
417       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
418       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
419     }
420 
421 
422     if (osm->sort_indices) {
423       for (i=0; i<osm->n; i++) {
424         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
425         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
426       }
427     }
428     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
429     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
430 
431     /*
432        Merge the ISs, create merged vectors and restrictions.
433      */
434     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
435     on = 0;
436     for (i=0; i<osm->n; i++) {
437       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
438       on  += oni;
439     }
440     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
441     on   = 0;
442     /* Merge local indices together */
443     for (i=0; i<osm->n; i++) {
444       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
445       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
446       ierr = PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);CHKERRQ(ierr);
447       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
448       on  += oni;
449     }
450     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
451     nTotalInnerIndices = 0;
452     for(i=0; i<osm->n; i++){
453       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
454       nTotalInnerIndices += nInnerIndices;
455     }
456     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
457     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
458 
459     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
460     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
461     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
462     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
463     /* gois might indices not on local */
464     ierr = VecScatterCreateWithData(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
465     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
466     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
467     ierr = VecDestroy(&x);CHKERRQ(ierr);
468     ierr = ISDestroy(&gois);CHKERRQ(ierr);
469 
470     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
471     {
472       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
473       PetscInt        in;            /* Number of indices in the disjoint uniont of local inner subdomains. */
474       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
475       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
476       IS              giis;          /* IS for the disjoint union of inner subdomains. */
477       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
478       PetscScalar    *array;
479       const PetscInt *indices;
480       PetscInt        k;
481       on = 0;
482       for (i=0; i<osm->n; i++) {
483         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
484         on  += oni;
485       }
486       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
487       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
488       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
489       /* set communicator id to determine where overlap is */
490       in   = 0;
491       for (i=0; i<osm->n; i++) {
492         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
493         for (k = 0; k < ini; ++k){
494           array[in+k] = numbering[i];
495         }
496         in += ini;
497       }
498       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
499       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
502       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
503       on  = 0;
504       in  = 0;
505       for (i=0; i<osm->n; i++) {
506     	ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
507     	ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
508     	for (k=0; k<oni; k++) {
509           /*  skip overlapping indices to get inner domain */
510           if(PetscRealPart(array[on+k]) != numbering[i]) continue;
511           iidx[in]    = indices[k];
512           ioidx[in++] = gostart+on+k;
513     	}
514     	ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
515     	on += oni;
516       }
517       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
518       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
519       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
520       ierr = VecScatterCreateWithData(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
521       ierr = VecDestroy(&y);CHKERRQ(ierr);
522       ierr = ISDestroy(&giis);CHKERRQ(ierr);
523       ierr = ISDestroy(&giois);CHKERRQ(ierr);
524     }
525     ierr = ISDestroy(&goid);CHKERRQ(ierr);
526     ierr = PetscFree(numbering);CHKERRQ(ierr);
527 
528     /* Create the subdomain work vectors. */
529     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
530     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
531     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
532     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
533     for (i=0, on=0; i<osm->n; ++i, on += oni) {
534       PetscInt oNi;
535       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
536       /* on a sub communicator */
537       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
538       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
539       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
540     }
541     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
542     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
543     /* Create the subdomain solvers */
544     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
545     for (i=0; i<osm->n; i++) {
546       char subprefix[PETSC_MAX_PATH_LEN+1];
547       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
548       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
549       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
550       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
551       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
552       if (subdomain_dm) {
553 	    ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
554 	    ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
555       }
556       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
557       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
558       if (subdomain_names && subdomain_names[i]) {
559 	     ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
560 	     ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
561 	     ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
562       }
563       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
564       osm->ksp[i] = ksp;
565     }
566     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
567     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
568     scall = MAT_INITIAL_MATRIX;
569 
570   } else { /* if (pc->setupcalled) */
571     /*
572        Destroy the submatrices from the previous iteration
573     */
574     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
575       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
576       scall = MAT_INITIAL_MATRIX;
577     }
578     if(osm->permutationIS){
579       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
580       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
581       osm->pcmat = pc->pmat;
582       pc->pmat   = osm->permutationP;
583     }
584 
585   }
586 
587 
588   /*
589      Extract out the submatrices.
590   */
591   if (size > 1) {
592     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
593   } else {
594     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
595   }
596   if (scall == MAT_INITIAL_MATRIX) {
597     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
598     for (i=0; i<osm->n; i++) {
599       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
600       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
601     }
602   }
603 
604   /* Return control to the user so that the submatrices can be modified (e.g., to apply
605      different boundary conditions for the submatrices than for the global problem) */
606   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
607 
608   /*
609      Loop over submatrices putting them into local ksps
610   */
611   for (i=0; i<osm->n; i++) {
612     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
613     if (!pc->setupcalled) {
614       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
615     }
616   }
617   if(osm->pcmat){
618     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
619     pc->pmat   = osm->pcmat;
620     osm->pcmat = 0;
621   }
622   PetscFunctionReturn(0);
623 }
624 
625 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
626 {
627   PC_GASM        *osm = (PC_GASM*)pc->data;
628   PetscErrorCode ierr;
629   PetscInt       i;
630 
631   PetscFunctionBegin;
632   for (i=0; i<osm->n; i++) {
633     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
634   }
635   PetscFunctionReturn(0);
636 }
637 
638 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
639 {
640   PC_GASM        *osm = (PC_GASM*)pc->data;
641   PetscErrorCode ierr;
642   PetscInt       i;
643   Vec            x,y;
644   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
645 
646   PetscFunctionBegin;
647   if(osm->pctoouter){
648     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
649     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
650     x = osm->pcx;
651     y = osm->pcy;
652   }else{
653 	x = xin;
654 	y = yout;
655   }
656   /*
657      Support for limiting the restriction or interpolation only to the inner
658      subdomain values (leaving the other values 0).
659   */
660   if (!(osm->type & PC_GASM_RESTRICT)) {
661     /* have to zero the work RHS since scatter may leave some slots empty */
662     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
663     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
664   } else {
665     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
666   }
667   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
668   if (!(osm->type & PC_GASM_RESTRICT)) {
669     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
670   } else {
671     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
672   }
673   /* do the subdomain solves */
674   for (i=0; i<osm->n; ++i) {
675     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
676     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
677   }
678   /* Do we need to zero y ?? */
679   ierr = VecZeroEntries(y);CHKERRQ(ierr);
680   if (!(osm->type & PC_GASM_INTERPOLATE)) {
681     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
682     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
683   } else {
684     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
685     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
686   }
687   if(osm->pctoouter){
688     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
689     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
695 {
696   PC_GASM        *osm = (PC_GASM*)pc->data;
697   PetscErrorCode ierr;
698   PetscInt       i;
699   Vec            x,y;
700   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
701 
702   PetscFunctionBegin;
703   if(osm->pctoouter){
704    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
705    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
706    x = osm->pcx;
707    y = osm->pcy;
708   }else{
709 	x = xin;
710 	y = yout;
711   }
712   /*
713      Support for limiting the restriction or interpolation to only local
714      subdomain values (leaving the other values 0).
715 
716      Note: these are reversed from the PCApply_GASM() because we are applying the
717      transpose of the three terms
718   */
719   if (!(osm->type & PC_GASM_INTERPOLATE)) {
720     /* have to zero the work RHS since scatter may leave some slots empty */
721     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
722     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
723   } else {
724     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
725   }
726   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
727   if (!(osm->type & PC_GASM_INTERPOLATE)) {
728     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
729   } else {
730     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
731   }
732   /* do the local solves */
733   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
734     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
735     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
736   }
737   ierr = VecZeroEntries(y);CHKERRQ(ierr);
738   if (!(osm->type & PC_GASM_RESTRICT)) {
739     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
740     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
741   } else {
742     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
743     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
744   }
745   if(osm->pctoouter){
746    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 static PetscErrorCode PCReset_GASM(PC pc)
753 {
754   PC_GASM        *osm = (PC_GASM*)pc->data;
755   PetscErrorCode ierr;
756   PetscInt       i;
757 
758   PetscFunctionBegin;
759   if (osm->ksp) {
760     for (i=0; i<osm->n; i++) {
761       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
762     }
763   }
764   if (osm->pmat) {
765     if (osm->n > 0) {
766       PetscMPIInt size;
767       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
768       if (size > 1) {
769         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
770         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
771       } else {
772         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
773       }
774     }
775   }
776   if (osm->x) {
777     for (i=0; i<osm->n; i++) {
778       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
779       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
780     }
781   }
782   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
783   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
784 
785   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
786   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
787   if (!osm->user_subdomains) {
788     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
789     osm->N    = PETSC_DETERMINE;
790     osm->nmax = PETSC_DETERMINE;
791   }
792   if(osm->pctoouter){
793 	ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
794   }
795   if(osm->permutationIS){
796 	ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
797   }
798   if(osm->pcx){
799 	ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
800   }
801   if(osm->pcy){
802 	ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
803   }
804   if(osm->permutationP){
805     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
806   }
807   if(osm->pcmat){
808 	ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
809   }
810   PetscFunctionReturn(0);
811 }
812 
813 static PetscErrorCode PCDestroy_GASM(PC pc)
814 {
815   PC_GASM        *osm = (PC_GASM*)pc->data;
816   PetscErrorCode ierr;
817   PetscInt       i;
818 
819   PetscFunctionBegin;
820   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
821   /* PCReset will not destroy subdomains, if user_subdomains is true. */
822   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
823   if (osm->ksp) {
824     for (i=0; i<osm->n; i++) {
825       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
826     }
827     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(osm->x);CHKERRQ(ierr);
830   ierr = PetscFree(osm->y);CHKERRQ(ierr);
831   ierr = PetscFree(pc->data);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
836 {
837   PC_GASM        *osm = (PC_GASM*)pc->data;
838   PetscErrorCode ierr;
839   PetscInt       blocks,ovl;
840   PetscBool      symset,flg;
841   PCGASMType     gasmtype;
842 
843   PetscFunctionBegin;
844   /* set the type to symmetric if matrix is symmetric */
845   if (!osm->type_set && pc->pmat) {
846     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
847     if (symset && flg) osm->type = PC_GASM_BASIC;
848   }
849   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
850   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
851   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
852   if (flg) {
853     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
854   }
855   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
856   if (flg) {
857     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
858     osm->dm_subdomains = PETSC_FALSE;
859   }
860   flg  = PETSC_FALSE;
861   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
862   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
863   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
864   ierr = PetscOptionsTail();CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 /*------------------------------------------------------------------------------------*/
869 
870 /*@
871     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
872                                communicator.
873     Logically collective on pc
874 
875     Input Parameters:
876 +   pc  - the preconditioner
877 -   N   - total number of subdomains
878 
879 
880     Level: beginner
881 
882 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
883 
884 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
885           PCGASMCreateSubdomains2D()
886 @*/
887 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
888 {
889   PC_GASM        *osm = (PC_GASM*)pc->data;
890   PetscMPIInt    size,rank;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
895   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
896 
897   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
898   osm->ois = osm->iis = NULL;
899 
900   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
901   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
902   osm->N    = N;
903   osm->n    = PETSC_DETERMINE;
904   osm->nmax = PETSC_DETERMINE;
905   osm->dm_subdomains = PETSC_FALSE;
906   PetscFunctionReturn(0);
907 }
908 
909 
910 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
911 {
912   PC_GASM         *osm = (PC_GASM*)pc->data;
913   PetscErrorCode  ierr;
914   PetscInt        i;
915 
916   PetscFunctionBegin;
917   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
918   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
919 
920   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
921   osm->iis  = osm->ois = NULL;
922   osm->n    = n;
923   osm->N    = PETSC_DETERMINE;
924   osm->nmax = PETSC_DETERMINE;
925   if (ois) {
926     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
927     for (i=0; i<n; i++) {
928       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
929       osm->ois[i] = ois[i];
930     }
931     /*
932        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
933        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
934     */
935     osm->overlap = -1;
936     /* inner subdomains must be provided  */
937     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
938   }/* end if */
939   if (iis) {
940     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
941     for (i=0; i<n; i++) {
942       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
943       osm->iis[i] = iis[i];
944     }
945     if (!ois) {
946       osm->ois = NULL;
947       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
948     }
949   }
950 #if defined(PETSC_USE_DEBUG)
951   {
952     PetscInt        j,rstart,rend,*covered,lsize;
953     const PetscInt  *indices;
954     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
955     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
956     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
957     /* check if the current processor owns indices from others */
958     for (i=0; i<n; i++) {
959       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
960       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
961       for (j=0; j<lsize; j++) {
962         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
963         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
964         else covered[indices[j]-rstart] = 1;
965       }
966     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
967     }
968     /* check if we miss any indices */
969     for (i=rstart; i<rend; i++) {
970       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
971     }
972     ierr = PetscFree(covered);CHKERRQ(ierr);
973   }
974 #endif
975   if (iis)  osm->user_subdomains = PETSC_TRUE;
976   PetscFunctionReturn(0);
977 }
978 
979 
980 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
981 {
982   PC_GASM *osm = (PC_GASM*)pc->data;
983 
984   PetscFunctionBegin;
985   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
986   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
987   if (!pc->setupcalled) osm->overlap = ovl;
988   PetscFunctionReturn(0);
989 }
990 
991 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
992 {
993   PC_GASM *osm = (PC_GASM*)pc->data;
994 
995   PetscFunctionBegin;
996   osm->type     = type;
997   osm->type_set = PETSC_TRUE;
998   PetscFunctionReturn(0);
999 }
1000 
1001 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1002 {
1003   PC_GASM *osm = (PC_GASM*)pc->data;
1004 
1005   PetscFunctionBegin;
1006   osm->sort_indices = doSort;
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 /*
1011    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1012         In particular, it would upset the global subdomain number calculation.
1013 */
1014 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1015 {
1016   PC_GASM        *osm = (PC_GASM*)pc->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1021 
1022   if (n) *n = osm->n;
1023   if (first) {
1024     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1025     *first -= osm->n;
1026   }
1027   if (ksp) {
1028     /* Assume that local solves are now different; not necessarily
1029        true, though!  This flag is used only for PCView_GASM() */
1030     *ksp                        = osm->ksp;
1031     osm->same_subdomain_solvers = PETSC_FALSE;
1032   }
1033   PetscFunctionReturn(0);
1034 } /* PCGASMGetSubKSP_GASM() */
1035 
1036 /*@C
1037     PCGASMSetSubdomains - Sets the subdomains for this processor
1038     for the additive Schwarz preconditioner.
1039 
1040     Collective on pc
1041 
1042     Input Parameters:
1043 +   pc  - the preconditioner object
1044 .   n   - the number of subdomains for this processor
1045 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1046 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1047 
1048     Notes:
1049     The IS indices use the parallel, global numbering of the vector entries.
1050     Inner subdomains are those where the correction is applied.
1051     Outer subdomains are those where the residual necessary to obtain the
1052     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1053     Both inner and outer subdomains can extend over several processors.
1054     This processor's portion of a subdomain is known as a local subdomain.
1055 
1056     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1057     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1058     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1059     on another MPI process.
1060 
1061     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1062 
1063 
1064     Level: advanced
1065 
1066 .keywords: PC, GASM, set, subdomains, additive Schwarz
1067 
1068 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1069           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1070 @*/
1071 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1072 {
1073   PC_GASM *osm = (PC_GASM*)pc->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1078   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1079   osm->dm_subdomains = PETSC_FALSE;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 
1084 /*@
1085     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1086     additive Schwarz preconditioner.  Either all or no processors in the
1087     pc communicator must call this routine.
1088 
1089     Logically Collective on pc
1090 
1091     Input Parameters:
1092 +   pc  - the preconditioner context
1093 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1094 
1095     Options Database Key:
1096 .   -pc_gasm_overlap <overlap> - Sets overlap
1097 
1098     Notes:
1099     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1100     multiple subdomain per perocessor or "straddling" subdomains that intersect
1101     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1102 
1103     The overlap defaults to 0, so if one desires that no additional
1104     overlap be computed beyond what may have been set with a call to
1105     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1106     not explicitly set the subdomains in application code, then all overlap would be computed
1107     internally by PETSc, and using an overlap of 0 would result in an GASM
1108     variant that is equivalent to the block Jacobi preconditioner.
1109 
1110     Note that one can define initial index sets with any overlap via
1111     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1112     PETSc to extend that overlap further, if desired.
1113 
1114     Level: intermediate
1115 
1116 .keywords: PC, GASM, set, overlap
1117 
1118 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1119           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1120 @*/
1121 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1122 {
1123   PetscErrorCode ierr;
1124   PC_GASM *osm = (PC_GASM*)pc->data;
1125 
1126   PetscFunctionBegin;
1127   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1128   PetscValidLogicalCollectiveInt(pc,ovl,2);
1129   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1130   osm->dm_subdomains = PETSC_FALSE;
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 /*@
1135     PCGASMSetType - Sets the type of restriction and interpolation used
1136     for local problems in the additive Schwarz method.
1137 
1138     Logically Collective on PC
1139 
1140     Input Parameters:
1141 +   pc  - the preconditioner context
1142 -   type - variant of GASM, one of
1143 .vb
1144       PC_GASM_BASIC       - full interpolation and restriction
1145       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1146       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1147       PC_GASM_NONE        - local processor restriction and interpolation
1148 .ve
1149 
1150     Options Database Key:
1151 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1152 
1153     Level: intermediate
1154 
1155 .keywords: PC, GASM, set, type
1156 
1157 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1158           PCGASMCreateSubdomains2D()
1159 @*/
1160 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1161 {
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1166   PetscValidLogicalCollectiveEnum(pc,type,2);
1167   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 /*@
1172     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1173 
1174     Logically Collective on PC
1175 
1176     Input Parameters:
1177 +   pc  - the preconditioner context
1178 -   doSort - sort the subdomain indices
1179 
1180     Level: intermediate
1181 
1182 .keywords: PC, GASM, set, type
1183 
1184 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1185           PCGASMCreateSubdomains2D()
1186 @*/
1187 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1193   PetscValidLogicalCollectiveBool(pc,doSort,2);
1194   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 /*@C
1199    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1200    this processor.
1201 
1202    Collective on PC iff first_local is requested
1203 
1204    Input Parameter:
1205 .  pc - the preconditioner context
1206 
1207    Output Parameters:
1208 +  n_local - the number of blocks on this processor or NULL
1209 .  first_local - the global number of the first block on this processor or NULL,
1210                  all processors must request or all must pass NULL
1211 -  ksp - the array of KSP contexts
1212 
1213    Note:
1214    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1215 
1216    Currently for some matrix implementations only 1 block per processor
1217    is supported.
1218 
1219    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1220 
1221    Level: advanced
1222 
1223 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1224 
1225 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1226           PCGASMCreateSubdomains2D(),
1227 @*/
1228 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1229 {
1230   PetscErrorCode ierr;
1231 
1232   PetscFunctionBegin;
1233   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1234   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 /* -------------------------------------------------------------------------------------*/
1239 /*MC
1240    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1241            its own KSP object.
1242 
1243    Options Database Keys:
1244 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1245 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1246 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1247 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1248 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1249 
1250      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1251       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1252       -pc_gasm_type basic to use the standard GASM.
1253 
1254    Notes:
1255     Blocks can be shared by multiple processes.
1256 
1257      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1258         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1259 
1260      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1261          and set the options directly on the resulting KSP object (you can access its PC
1262          with KSPGetPC())
1263 
1264 
1265    Level: beginner
1266 
1267    Concepts: additive Schwarz method
1268 
1269     References:
1270 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1271      Courant Institute, New York University Technical report
1272 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1273     Cambridge University Press.
1274 
1275 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1276            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1277            PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1278 
1279 M*/
1280 
1281 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1282 {
1283   PetscErrorCode ierr;
1284   PC_GASM        *osm;
1285 
1286   PetscFunctionBegin;
1287   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1288 
1289   osm->N                        = PETSC_DETERMINE;
1290   osm->n                        = PETSC_DECIDE;
1291   osm->nmax                     = PETSC_DETERMINE;
1292   osm->overlap                  = 0;
1293   osm->ksp                      = 0;
1294   osm->gorestriction            = 0;
1295   osm->girestriction            = 0;
1296   osm->pctoouter                = 0;
1297   osm->gx                       = 0;
1298   osm->gy                       = 0;
1299   osm->x                        = 0;
1300   osm->y                        = 0;
1301   osm->pcx                      = 0;
1302   osm->pcy                      = 0;
1303   osm->permutationIS            = 0;
1304   osm->permutationP             = 0;
1305   osm->pcmat                    = 0;
1306   osm->ois                      = 0;
1307   osm->iis                      = 0;
1308   osm->pmat                     = 0;
1309   osm->type                     = PC_GASM_RESTRICT;
1310   osm->same_subdomain_solvers   = PETSC_TRUE;
1311   osm->sort_indices             = PETSC_TRUE;
1312   osm->dm_subdomains            = PETSC_FALSE;
1313   osm->hierarchicalpartitioning = PETSC_FALSE;
1314 
1315   pc->data                 = (void*)osm;
1316   pc->ops->apply           = PCApply_GASM;
1317   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1318   pc->ops->setup           = PCSetUp_GASM;
1319   pc->ops->reset           = PCReset_GASM;
1320   pc->ops->destroy         = PCDestroy_GASM;
1321   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1322   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1323   pc->ops->view            = PCView_GASM;
1324   pc->ops->applyrichardson = 0;
1325 
1326   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1327   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1328   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1329   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1330   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 
1335 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1336 {
1337   MatPartitioning mpart;
1338   const char      *prefix;
1339   PetscInt        i,j,rstart,rend,bs;
1340   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1341   Mat             Ad     = NULL, adj;
1342   IS              ispart,isnumb,*is;
1343   PetscErrorCode  ierr;
1344 
1345   PetscFunctionBegin;
1346   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1347 
1348   /* Get prefix, row distribution, and block size */
1349   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1350   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1351   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1352   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1353 
1354   /* Get diagonal block from matrix if possible */
1355   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1356   if (hasop) {
1357     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1358   }
1359   if (Ad) {
1360     ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1361     if (!isbaij) {ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1362   }
1363   if (Ad && nloc > 1) {
1364     PetscBool  match,done;
1365     /* Try to setup a good matrix partitioning if available */
1366     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1367     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1368     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1369     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1370     if (!match) {
1371       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1372     }
1373     if (!match) { /* assume a "good" partitioner is available */
1374       PetscInt       na;
1375       const PetscInt *ia,*ja;
1376       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1377       if (done) {
1378         /* Build adjacency matrix by hand. Unfortunately a call to
1379            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1380            remove the block-aij structure and we cannot expect
1381            MatPartitioning to split vertices as we need */
1382         PetscInt       i,j,len,nnz,cnt,*iia=0,*jja=0;
1383         const PetscInt *row;
1384         nnz = 0;
1385         for (i=0; i<na; i++) { /* count number of nonzeros */
1386           len = ia[i+1] - ia[i];
1387           row = ja + ia[i];
1388           for (j=0; j<len; j++) {
1389             if (row[j] == i) { /* don't count diagonal */
1390               len--; break;
1391             }
1392           }
1393           nnz += len;
1394         }
1395         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1396         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1397         nnz    = 0;
1398         iia[0] = 0;
1399         for (i=0; i<na; i++) { /* fill adjacency */
1400           cnt = 0;
1401           len = ia[i+1] - ia[i];
1402           row = ja + ia[i];
1403           for (j=0; j<len; j++) {
1404             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1405           }
1406           nnz += cnt;
1407           iia[i+1] = nnz;
1408         }
1409         /* Partitioning of the adjacency matrix */
1410         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1411         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1412         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1413         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1414         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1415         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1416         foundpart = PETSC_TRUE;
1417       }
1418       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1419     }
1420     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1421   }
1422   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1423   if (!foundpart) {
1424 
1425     /* Partitioning by contiguous chunks of rows */
1426 
1427     PetscInt mbs   = (rend-rstart)/bs;
1428     PetscInt start = rstart;
1429     for (i=0; i<nloc; i++) {
1430       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1431       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1432       start += count;
1433     }
1434 
1435   } else {
1436 
1437     /* Partitioning by adjacency of diagonal block  */
1438 
1439     const PetscInt *numbering;
1440     PetscInt       *count,nidx,*indices,*newidx,start=0;
1441     /* Get node count in each partition */
1442     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1443     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1444     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1445       for (i=0; i<nloc; i++) count[i] *= bs;
1446     }
1447     /* Build indices from node numbering */
1448     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1449     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1450     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1451     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1452     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1453     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1454     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1455       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1456       for (i=0; i<nidx; i++) {
1457         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1458       }
1459       ierr    = PetscFree(indices);CHKERRQ(ierr);
1460       nidx   *= bs;
1461       indices = newidx;
1462     }
1463     /* Shift to get global indices */
1464     for (i=0; i<nidx; i++) indices[i] += rstart;
1465 
1466     /* Build the index sets for each block */
1467     for (i=0; i<nloc; i++) {
1468       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1469       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1470       start += count[i];
1471     }
1472 
1473     ierr = PetscFree(count);CHKERRQ(ierr);
1474     ierr = PetscFree(indices);CHKERRQ(ierr);
1475     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1476     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1477   }
1478   *iis = is;
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1483 {
1484   PetscErrorCode  ierr;
1485 
1486   PetscFunctionBegin;
1487   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 
1492 
1493 /*@C
1494    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1495    Schwarz preconditioner for a any problem based on its matrix.
1496 
1497    Collective
1498 
1499    Input Parameters:
1500 +  A       - The global matrix operator
1501 -  N       - the number of global subdomains requested
1502 
1503    Output Parameters:
1504 +  n   - the number of subdomains created on this processor
1505 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1506 
1507    Level: advanced
1508 
1509    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1510          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1511          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1512 	 outer subdomains will be automatically generated from these according to the requested amount of
1513 	 overlap; this is currently supported only with local subdomains.
1514 
1515 
1516 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1517 
1518 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1519 @*/
1520 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1521 {
1522   PetscMPIInt     size;
1523   PetscErrorCode  ierr;
1524 
1525   PetscFunctionBegin;
1526   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1527   PetscValidPointer(iis,4);
1528 
1529   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1530   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1531   if (N >= size) {
1532     *n = N/size + (N%size);
1533     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1534   } else {
1535     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 /*@C
1541    PCGASMDestroySubdomains - Destroys the index sets created with
1542    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1543    called after setting subdomains with PCGASMSetSubdomains().
1544 
1545    Collective
1546 
1547    Input Parameters:
1548 +  n   - the number of index sets
1549 .  iis - the array of inner subdomains,
1550 -  ois - the array of outer subdomains, can be NULL
1551 
1552    Level: intermediate
1553 
1554    Notes:
1555     this is merely a convenience subroutine that walks each list,
1556    destroys each IS on the list, and then frees the list. At the end the
1557    list pointers are set to NULL.
1558 
1559 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1560 
1561 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1562 @*/
1563 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1564 {
1565   PetscInt       i;
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   if (n <= 0) PetscFunctionReturn(0);
1570   if (ois) {
1571     PetscValidPointer(ois,3);
1572     if (*ois) {
1573       PetscValidPointer(*ois,3);
1574       for (i=0; i<n; i++) {
1575         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1576       }
1577       ierr = PetscFree((*ois));CHKERRQ(ierr);
1578     }
1579   }
1580   if (iis) {
1581     PetscValidPointer(iis,2);
1582     if (*iis) {
1583       PetscValidPointer(*iis,2);
1584       for (i=0; i<n; i++) {
1585         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1586       }
1587       ierr = PetscFree((*iis));CHKERRQ(ierr);
1588     }
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 
1594 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1595   {                                                                                                       \
1596     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1597     /*                                                                                                    \
1598      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1599      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1600      subdomain).                                                                                          \
1601      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1602      of the intersection.                                                                                 \
1603     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1604     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1605     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1606     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1607     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1608     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1609     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1610     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1611     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1612     /* Now compute the size of the local subdomain n. */ \
1613     *n = 0;                                               \
1614     if (*ylow_loc < *yhigh_loc) {                           \
1615       PetscInt width = xright-xleft;                     \
1616       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1617       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1618       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1619     } \
1620   }
1621 
1622 
1623 
1624 /*@
1625    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1626    preconditioner for a two-dimensional problem on a regular grid.
1627 
1628    Collective
1629 
1630    Input Parameters:
1631 +  M, N               - the global number of grid points in the x and y directions
1632 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1633 .  dof                - degrees of freedom per node
1634 -  overlap            - overlap in mesh lines
1635 
1636    Output Parameters:
1637 +  Nsub - the number of local subdomains created
1638 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1639 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1640 
1641 
1642    Level: advanced
1643 
1644 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1645 
1646 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1647 @*/
1648 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1649 {
1650   PetscErrorCode ierr;
1651   PetscMPIInt    size, rank;
1652   PetscInt       i, j;
1653   PetscInt       maxheight, maxwidth;
1654   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1655   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1656   PetscInt       x[2][2], y[2][2], n[2];
1657   PetscInt       first, last;
1658   PetscInt       nidx, *idx;
1659   PetscInt       ii,jj,s,q,d;
1660   PetscInt       k,kk;
1661   PetscMPIInt    color;
1662   MPI_Comm       comm, subcomm;
1663   IS             **xis = 0, **is = ois, **is_local = iis;
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1667   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1668   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1669   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1670   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1671                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1672 
1673   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1674   s      = 0;
1675   ystart = 0;
1676   for (j=0; j<Ndomains; ++j) {
1677     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1678     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1679     /* Vertical domain limits with an overlap. */
1680     ylow   = PetscMax(ystart - overlap,0);
1681     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1682     xstart = 0;
1683     for (i=0; i<Mdomains; ++i) {
1684       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1685       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1686       /* Horizontal domain limits with an overlap. */
1687       xleft  = PetscMax(xstart - overlap,0);
1688       xright = PetscMin(xstart + maxwidth + overlap,M);
1689       /*
1690          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1691       */
1692       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1693       if (nidx) ++s;
1694       xstart += maxwidth;
1695     } /* for (i = 0; i < Mdomains; ++i) */
1696     ystart += maxheight;
1697   } /* for (j = 0; j < Ndomains; ++j) */
1698 
1699   /* Now we can allocate the necessary number of ISs. */
1700   *nsub  = s;
1701   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1702   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1703   s      = 0;
1704   ystart = 0;
1705   for (j=0; j<Ndomains; ++j) {
1706     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1707     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1708     /* Vertical domain limits with an overlap. */
1709     y[0][0] = PetscMax(ystart - overlap,0);
1710     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1711     /* Vertical domain limits without an overlap. */
1712     y[1][0] = ystart;
1713     y[1][1] = PetscMin(ystart + maxheight,N);
1714     xstart  = 0;
1715     for (i=0; i<Mdomains; ++i) {
1716       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1717       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1718       /* Horizontal domain limits with an overlap. */
1719       x[0][0] = PetscMax(xstart - overlap,0);
1720       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1721       /* Horizontal domain limits without an overlap. */
1722       x[1][0] = xstart;
1723       x[1][1] = PetscMin(xstart+maxwidth,M);
1724       /*
1725          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1726          Do this twice: first for the domains with overlaps, and once without.
1727          During the first pass create the subcommunicators, and use them on the second pass as well.
1728       */
1729       for (q = 0; q < 2; ++q) {
1730         PetscBool split = PETSC_FALSE;
1731         /*
1732           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1733           according to whether the domain with an overlap or without is considered.
1734         */
1735         xleft = x[q][0]; xright = x[q][1];
1736         ylow  = y[q][0]; yhigh  = y[q][1];
1737         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1738         nidx *= dof;
1739         n[q]  = nidx;
1740         /*
1741          Based on the counted number of indices in the local domain *with an overlap*,
1742          construct a subcommunicator of all the processors supporting this domain.
1743          Observe that a domain with an overlap might have nontrivial local support,
1744          while the domain without an overlap might not.  Hence, the decision to participate
1745          in the subcommunicator must be based on the domain with an overlap.
1746          */
1747         if (q == 0) {
1748           if (nidx) color = 1;
1749           else color = MPI_UNDEFINED;
1750           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1751           split = PETSC_TRUE;
1752         }
1753         /*
1754          Proceed only if the number of local indices *with an overlap* is nonzero.
1755          */
1756         if (n[0]) {
1757           if (q == 0) xis = is;
1758           if (q == 1) {
1759             /*
1760              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1761              Moreover, if the overlap is zero, the two ISs are identical.
1762              */
1763             if (overlap == 0) {
1764               (*is_local)[s] = (*is)[s];
1765               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1766               continue;
1767             } else {
1768               xis     = is_local;
1769               subcomm = ((PetscObject)(*is)[s])->comm;
1770             }
1771           } /* if (q == 1) */
1772           idx  = NULL;
1773           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1774           if (nidx) {
1775             k = 0;
1776             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1777               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1778               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1779               kk = dof*(M*jj + x0);
1780               for (ii=x0; ii<x1; ++ii) {
1781                 for (d = 0; d < dof; ++d) {
1782                   idx[k++] = kk++;
1783                 }
1784               }
1785             }
1786           }
1787           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1788           if (split) {
1789             ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
1790           }
1791         }/* if (n[0]) */
1792       }/* for (q = 0; q < 2; ++q) */
1793       if (n[0]) ++s;
1794       xstart += maxwidth;
1795     } /* for (i = 0; i < Mdomains; ++i) */
1796     ystart += maxheight;
1797   } /* for (j = 0; j < Ndomains; ++j) */
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 /*@C
1802     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1803     for the additive Schwarz preconditioner.
1804 
1805     Not Collective
1806 
1807     Input Parameter:
1808 .   pc - the preconditioner context
1809 
1810     Output Parameters:
1811 +   n   - the number of subdomains for this processor (default value = 1)
1812 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1813 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1814 
1815 
1816     Notes:
1817     The user is responsible for destroying the ISs and freeing the returned arrays.
1818     The IS numbering is in the parallel, global numbering of the vector.
1819 
1820     Level: advanced
1821 
1822 .keywords: PC, GASM, get, subdomains, additive Schwarz
1823 
1824 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1825           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1826 @*/
1827 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1828 {
1829   PC_GASM        *osm;
1830   PetscErrorCode ierr;
1831   PetscBool      match;
1832   PetscInt       i;
1833 
1834   PetscFunctionBegin;
1835   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1836   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1837   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1838   osm = (PC_GASM*)pc->data;
1839   if (n) *n = osm->n;
1840   if (iis) {
1841     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1842   }
1843   if (ois) {
1844     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1845   }
1846   if (iis || ois) {
1847     for (i = 0; i < osm->n; ++i) {
1848       if (iis) (*iis)[i] = osm->iis[i];
1849       if (ois) (*ois)[i] = osm->ois[i];
1850     }
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 /*@C
1856     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1857     only) for the additive Schwarz preconditioner.
1858 
1859     Not Collective
1860 
1861     Input Parameter:
1862 .   pc - the preconditioner context
1863 
1864     Output Parameters:
1865 +   n   - the number of matrices for this processor (default value = 1)
1866 -   mat - the matrices
1867 
1868     Notes:
1869     matrices returned by this routine have the same communicators as the index sets (IS)
1870            used to define subdomains in PCGASMSetSubdomains()
1871     Level: advanced
1872 
1873 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1874 
1875 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1876           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1877 @*/
1878 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1879 {
1880   PC_GASM        *osm;
1881   PetscErrorCode ierr;
1882   PetscBool      match;
1883 
1884   PetscFunctionBegin;
1885   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1886   PetscValidIntPointer(n,2);
1887   if (mat) PetscValidPointer(mat,3);
1888   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1889   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1890   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1891   osm = (PC_GASM*)pc->data;
1892   if (n) *n = osm->n;
1893   if (mat) *mat = osm->pmat;
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 /*@
1898     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1899     Logically Collective
1900 
1901     Input Parameter:
1902 +   pc  - the preconditioner
1903 -   flg - boolean indicating whether to use subdomains defined by the DM
1904 
1905     Options Database Key:
1906 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1907 
1908     Level: intermediate
1909 
1910     Notes:
1911     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1912     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1913     automatically turns the latter off.
1914 
1915 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1916 
1917 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1918           PCGASMCreateSubdomains2D()
1919 @*/
1920 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1921 {
1922   PC_GASM        *osm = (PC_GASM*)pc->data;
1923   PetscErrorCode ierr;
1924   PetscBool      match;
1925 
1926   PetscFunctionBegin;
1927   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1928   PetscValidLogicalCollectiveBool(pc,flg,2);
1929   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1930   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1931   if (match) {
1932     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1933       osm->dm_subdomains = flg;
1934     }
1935   }
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 /*@
1940     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1941     Not Collective
1942 
1943     Input Parameter:
1944 .   pc  - the preconditioner
1945 
1946     Output Parameter:
1947 .   flg - boolean indicating whether to use subdomains defined by the DM
1948 
1949     Level: intermediate
1950 
1951 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1952 
1953 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1954           PCGASMCreateSubdomains2D()
1955 @*/
1956 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1957 {
1958   PC_GASM        *osm = (PC_GASM*)pc->data;
1959   PetscErrorCode ierr;
1960   PetscBool      match;
1961 
1962   PetscFunctionBegin;
1963   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1964   PetscValidPointer(flg,2);
1965   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1966   if (match) {
1967     if (flg) *flg = osm->dm_subdomains;
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971