xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision a64e098fc20714e9cef7e27e1bf2de9a50c27852)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may have any number of subdomains and a subdomain may live on multiple
4   processors.
5 
6        N    - total number of local subdomains on all processors  (set in PCGASMSetTotalBlockCount() or calculated in PCSetUp_GASM())
7        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalBlockCount())
8        nmax - maximum number of local subdomains per processor    (calculated in PCGASMSetTotalBlockCount() or in PCSetUp_GASM())
9 */
10 #include <petsc-private/pcimpl.h>     /*I "petscpc.h" I*/
11 
12 typedef struct {
13   PetscInt   N,n,nmax;
14   PetscInt   overlap;             /* overlap requested by user */
15   KSP        *ksp;                /* linear solvers for each block */
16   Vec        gx,gy;               /* Merged work vectors */
17   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
18   IS         gis, gis_local;      /* merged ISs */
19   VecScatter grestriction;        /* merged restriction */
20   VecScatter gprolongation;       /* merged prolongation */
21   IS         *is;                 /* index set that defines each overlapping subdomain */
22   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
23   Mat        *pmat;               /* subdomain block matrices */
24   PCGASMType  type;               /* use reduced interpolation, restriction or both */
25   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
26   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
27   PetscBool  sort_indices;        /* flag to sort subdomain indices */
28 } PC_GASM;
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "PCGASMSubdomainView_Private"
32 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
33 {
34   PC_GASM        *osm  = (PC_GASM*)pc->data;
35   PetscInt       j,nidx;
36   const PetscInt *idx;
37   PetscViewer    sviewer;
38   char           *cidx;
39   PetscErrorCode ierr;
40 
41   PetscFunctionBegin;
42   if(i < 0 || i > osm->n) SETERRQ2(((PetscObject)viewer)->comm, PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
43   ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
44   /*
45    No more than 15 characters per index plus a space.
46    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
47    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
48    For nidx == 0, the whole string 16 '\0'.
49    */
50   ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
51   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer); CHKERRQ(ierr);
52   ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
53   for(j = 0; j < nidx; ++j) {
54     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]); CHKERRQ(ierr);
55   }
56   ierr = PetscViewerDestroy(&sviewer); CHKERRQ(ierr);
57   ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
58   ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap:\n");CHKERRQ(ierr);
59   ierr = PetscViewerFlush(viewer);                                   CHKERRQ(ierr);
60   ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
61   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
62   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
63   ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
64   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
65   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
66   ierr = PetscFree(cidx);CHKERRQ(ierr);
67   if (osm->is_local) {
68     ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
69     /*
70      No more than 15 characters per index plus a space.
71      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
72      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
73      For nidx == 0, the whole string 16 '\0'.
74      */
75     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
76     ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer); CHKERRQ(ierr);
77     ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
78     for(j = 0; j < nidx; ++j) {
79       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80     }
81     ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
82     ierr = PetscViewerDestroy(&sviewer); CHKERRQ(ierr);
83     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "Subdomain without overlap:\n");CHKERRQ(ierr);
84     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
85     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "\n");CHKERRQ(ierr);
86     ierr = PetscFree(cidx);CHKERRQ(ierr);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCGASMPrintSubdomains"
93 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
94 {
95   PC_GASM        *osm  = (PC_GASM*)pc->data;
96   const char     *prefix;
97   char           fname[PETSC_MAX_PATH_LEN+1];
98   PetscViewer    viewer;
99   PetscInt       i;
100   PetscBool      found;
101   PetscErrorCode ierr;
102 
103   PetscFunctionBegin;
104   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
105   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
106   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
107   for (i=0;i<osm->n;++i) {
108     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
109     ierr = PCGASMSubdomainView_Private(pc, i, viewer); CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCView_GASM"
117 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
118 {
119   PC_GASM        *osm = (PC_GASM*)pc->data;
120   const char     *prefix;
121   PetscErrorCode ierr;
122   PetscMPIInt    rank, size;
123   PetscInt       i,bsz;
124   PetscBool      iascii,view_subdomains=PETSC_FALSE;
125   PetscViewer    sviewer;
126   PetscInt       count, l, gcount, *numbering, *permutation;
127   char overlap[256]     = "no additional overlap requested";
128   char gsubdomains[256] = "unknown number of global subdomains";
129   char lsubdomains[256] = "unknown number of local  subdomains";
130   char msubdomains[256] = "unknown max number of local subdomains";
131   PetscFunctionBegin;
132   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
133   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
134 
135 
136   ierr = PetscMalloc2(osm->n, PetscInt, &permutation, osm->n, PetscInt, &numbering); CHKERRQ(ierr);
137   for(i = 0; i < osm->n; ++i) permutation[i] = i;
138   ierr = PetscObjectListGetGlobalNumbering(((PetscObject)pc)->comm, osm->n, (PetscObject*)osm->is, &gcount, numbering); CHKERRQ(ierr);
139   ierr = PetscSortIntWithPermutation(osm->n, numbering, permutation); CHKERRQ(ierr);
140 
141   if(osm->overlap >= 0) {
142     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);        CHKERRQ(ierr);
143   }
144   ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "number of global subdomains = %D",gcount);      CHKERRQ(ierr);
145   if(osm->N > 0) {
146     ierr = PetscSNPrintf(lsubdomains, sizeof(gsubdomains), "number of local subdomains = %D",osm->N);     CHKERRQ(ierr);
147   }
148   if(osm->nmax > 0){
149     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
150   }
151 
152   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
153   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_view_subdomains",&view_subdomains,PETSC_NULL);CHKERRQ(ierr);
154 
155   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii); CHKERRQ(ierr);
156   if (iascii) {
157     /*
158      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
159      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
160      collectively on the comm.
161      */
162     ierr = PetscObjectName((PetscObject)viewer);                  CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"Generalized additive Schwarz:\n"); CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",overlap);    CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",gsubdomains);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",lsubdomains);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",msubdomains);CHKERRQ(ierr);
169     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
170     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
171     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
173     /* Cannot take advantage of osm->same_local_solves without a global numbering of subdomains. */
174     ierr = PetscViewerASCIIPrintf(viewer,"Subdomain solver info is as follows:\n");CHKERRQ(ierr);
175     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
176     ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
177     /* Make sure that everybody waits for the banner to be printed. */
178     ierr = MPI_Barrier(((PetscObject)viewer)->comm); CHKERRQ(ierr);
179     l = 0;
180     for(count = 0; count < gcount; ++count) {
181       /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
182       PetscMPIInt srank, ssize;
183       if(l<osm->n){
184         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
185         if(numbering[d] == count) {
186           ierr = MPI_Comm_size(((PetscObject)osm->is[d])->comm, &ssize); CHKERRQ(ierr);
187           ierr = MPI_Comm_rank(((PetscObject)osm->is[d])->comm, &srank); CHKERRQ(ierr);
188           ierr = PetscViewerGetSubcomm(viewer,((PetscObject)osm->is[d])->comm, &sviewer);CHKERRQ(ierr);
189           ierr = ISGetLocalSize(osm->is[d],&bsz);CHKERRQ(ierr);
190           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr);
191           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%D:%D] (subcomm [%D:%D]) local subdomain number %D, local size = %D\n",(int)rank,(int)size,(int)srank,(int)ssize,d,bsz);CHKERRQ(ierr);
192           ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
193           if(view_subdomains) {
194             ierr = PCGASMSubdomainView_Private(pc,d,sviewer); CHKERRQ(ierr);
195           }
196           ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
197           ierr = PetscViewerASCIIPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
198           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
199           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr);
200           ierr = PetscViewerRestoreSubcomm(viewer,((PetscObject)osm->is[d])->comm, &sviewer);CHKERRQ(ierr);
201           ++l;
202         }
203       }
204       ierr = MPI_Barrier(((PetscObject)pc)->comm); CHKERRQ(ierr);
205     }
206     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
207   } else {
208     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
209   }
210   ierr = PetscFree2(permutation,numbering); CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 
215 
216 
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "PCSetUp_GASM"
220 static PetscErrorCode PCSetUp_GASM(PC pc)
221 {
222   PC_GASM         *osm  = (PC_GASM*)pc->data;
223   PetscErrorCode ierr;
224   PetscBool      symset,flg;
225   PetscInt       i;
226   PetscMPIInt    rank, size;
227   MatReuse       scall = MAT_REUSE_MATRIX;
228   KSP            ksp;
229   PC             subpc;
230   const char     *prefix,*pprefix;
231   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
232   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
233   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
234   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
235   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
236   Vec            x,y;
237   PetscScalar    *gxarray, *gyarray;
238   PetscInt       gfirst, glast;
239   DM             *domain_dm = PETSC_NULL;
240 
241   PetscFunctionBegin;
242   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
243   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
244   if (!pc->setupcalled) {
245 
246     if (!osm->type_set) {
247       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
248       if (symset && flg) { osm->type = PC_GASM_BASIC; }
249     }
250 
251     /*
252      If subdomains have been set, then the local number of subdomains, osm->n, is NOT PETSC_DECIDE and is at least 1.
253      The total number of subdomains, osm->N is not necessarily set, might be PETSC_DECIDE, and then will have to be calculated from osm->n.
254      */
255     if (osm->n == PETSC_DECIDE) {
256       /* no subdomains given */
257       /* try pc->dm first */
258       if(pc->dm) {
259         char      ddm_name[1024];
260         DM        ddm;
261         PetscBool flg;
262         PetscInt     num_domains, d;
263         char         **domain_names;
264         IS           *domain_is;
265         /* Allow the user to request a decomposition DM by name */
266         ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr);
267         ierr = PetscOptionsString("-pc_gasm_decomposition","Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr);
268         if(flg) {
269           ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr);
270           if(!ddm) {
271             SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name);
272           }
273           ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr);
274           ierr = PCSetDM(pc,ddm); CHKERRQ(ierr);
275         }
276         ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm);    CHKERRQ(ierr);
277         if(num_domains) {
278           ierr = PCGASMSetSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr);
279         }
280         for(d = 0; d < num_domains; ++d) {
281           ierr = PetscFree(domain_names[d]); CHKERRQ(ierr);
282           ierr = ISDestroy(&domain_is[d]);   CHKERRQ(ierr);
283         }
284         ierr = PetscFree(domain_names);CHKERRQ(ierr);
285         ierr = PetscFree(domain_is);CHKERRQ(ierr);
286       }
287       if (osm->n == PETSC_DECIDE) { /* still no subdomains; use one per processor */
288         osm->nmax = osm->n = 1;
289         ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
290         osm->N = size;
291       }
292     }
293     if (osm->N == PETSC_DECIDE) {
294       PetscInt inwork[2], outwork[2];
295       /* determine global number of subdomains and the max number of local subdomains */
296       inwork[0] = inwork[1] = osm->n;
297       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
298       osm->nmax = outwork[0];
299       osm->N    = outwork[1];
300     }
301     if (!osm->is){
302       /* The local number of subdomains was set in PCGASMSetTotalBlockCount() or PCGASMSetSubdomains(),
303        but the actual subdomains have not been supplied (in PCGASMSetSubdomains()).
304        We create the requisite number of subdomains on PETSC_COMM_SELF. */
305       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->is,&osm->is_local);CHKERRQ(ierr);
306     }
307     if (!osm->is_local) {
308       /*
309 	 This indicates that osm->is should define a nonoverlapping decomposition
310 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetSubdomains,
311 	  but the assumption is that either the user does the right thing, or subdomains in osm->is have been created
312 	  via PCGASMCreateLocalSubdomains(), which guarantees a nonoverlapping decomposition).
313 	 Therefore, osm->is will be used to define osm->is_local.
314 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
315 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
316 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
317       */
318       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
319       for (i=0; i<osm->n; i++) {
320         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
321           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
322           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
323         } else {
324           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
325           osm->is_local[i] = osm->is[i];
326         }
327       }
328     }
329     /* Beyond this point osm->is_local is not null. */
330     if (osm->overlap > 0) {
331       /* Extend the "overlapping" regions by a number of steps */
332       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
333     }
334     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
335     flg  = PETSC_FALSE;
336     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
337     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
338 
339     if (osm->sort_indices) {
340       for (i=0; i<osm->n; i++) {
341         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
342 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
343       }
344     }
345     /* Merge the ISs, create merged vectors and scatter contexts. */
346     /* Restriction ISs. */
347     ddn = 0;
348     for (i=0; i<osm->n; i++) {
349       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
350       ddn += dn;
351     }
352     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
353     ddn = 0;
354     for (i=0; i<osm->n; i++) {
355       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
356       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
357       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
358       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
359       ddn += dn;
360     }
361     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
362     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
363     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
364     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
365     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
366     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
367     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
368     ierr = ISDestroy(&gid);CHKERRQ(ierr);
369     /* Prolongation ISs */
370     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
371       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
372       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
373       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
374       /**/
375       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
376       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
377       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
378       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
379       PetscInt               j;
380       ddn_local = 0;
381       for (i=0; i<osm->n; i++) {
382 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
383 	ddn_local += dn_local;
384       }
385       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
386       ddn_local = 0;
387       for (i=0; i<osm->n; i++) {
388 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
389 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
390 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
391 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
392 	ddn_local += dn_local;
393       }
394       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
395       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
396       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
397       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
398       ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
399       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
400       /* Now convert these localized indices into the global indices into the merged output vector. */
401       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
402       for(j=0; j < ddn_llocal; ++j) {
403 	ddidx_llocal[j] += gfirst;
404       }
405       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
406       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
407       ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr);
408     }
409     /* Create the subdomain work vectors. */
410     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
411     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
412     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
413     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
414     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
415     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
416       PetscInt dN;
417       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
418       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
419       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,1,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
420       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,1,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
421     }
422     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
423     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
424     ierr = VecDestroy(&x);CHKERRQ(ierr);
425     ierr = VecDestroy(&y);CHKERRQ(ierr);
426     /* Create the local solvers */
427     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
428     for (i=0; i<osm->n; i++) { /* KSPs are local */
429       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
430       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
431       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
432       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
433       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
434       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
435       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
436       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
437       osm->ksp[i] = ksp;
438     }
439     scall = MAT_INITIAL_MATRIX;
440 
441   } else {
442     /*
443        Destroy the blocks from the previous iteration
444     */
445     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
446       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
447       scall = MAT_INITIAL_MATRIX;
448     }
449   }
450 
451   /*
452      Extract out the submatrices.
453   */
454   if(size > 1) {
455     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
456   }
457   else {
458     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
459   }
460   if (scall == MAT_INITIAL_MATRIX) {
461     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
462     for (i=0; i<osm->n; i++) {
463       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
464       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
465     }
466   }
467 
468   /* Return control to the user so that the submatrices can be modified (e.g., to apply
469      different boundary conditions for the submatrices than for the global problem) */
470   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
471 
472   /*
473      Loop over submatrices putting them into local ksp
474   */
475   for (i=0; i<osm->n; i++) {
476     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
477     if (!pc->setupcalled) {
478       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
479     }
480   }
481 
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
487 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
488 {
489   PC_GASM         *osm = (PC_GASM*)pc->data;
490   PetscErrorCode ierr;
491   PetscInt       i;
492 
493   PetscFunctionBegin;
494   for (i=0; i<osm->n; i++) {
495     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "PCApply_GASM"
502 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
503 {
504   PC_GASM         *osm = (PC_GASM*)pc->data;
505   PetscErrorCode ierr;
506   PetscInt       i;
507   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
508 
509   PetscFunctionBegin;
510   /*
511      Support for limiting the restriction or interpolation to only local
512      subdomain values (leaving the other values 0).
513   */
514   if (!(osm->type & PC_GASM_RESTRICT)) {
515     forward = SCATTER_FORWARD_LOCAL;
516     /* have to zero the work RHS since scatter may leave some slots empty */
517     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
518   }
519   if (!(osm->type & PC_GASM_INTERPOLATE)) {
520     reverse = SCATTER_REVERSE_LOCAL;
521   }
522 
523   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
524   ierr = VecZeroEntries(y);CHKERRQ(ierr);
525   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
526   /* do the subdomain solves */
527   for (i=0; i<osm->n; ++i) {
528     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
529   }
530   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
531   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "PCApplyTranspose_GASM"
537 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
538 {
539   PC_GASM         *osm = (PC_GASM*)pc->data;
540   PetscErrorCode ierr;
541   PetscInt       i;
542   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
543 
544   PetscFunctionBegin;
545   /*
546      Support for limiting the restriction or interpolation to only local
547      subdomain values (leaving the other values 0).
548 
549      Note: these are reversed from the PCApply_GASM() because we are applying the
550      transpose of the three terms
551   */
552   if (!(osm->type & PC_GASM_INTERPOLATE)) {
553     forward = SCATTER_FORWARD_LOCAL;
554     /* have to zero the work RHS since scatter may leave some slots empty */
555     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
556   }
557   if (!(osm->type & PC_GASM_RESTRICT)) {
558     reverse = SCATTER_REVERSE_LOCAL;
559   }
560 
561   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
562   ierr = VecZeroEntries(y);CHKERRQ(ierr);
563   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
564   /* do the local solves */
565   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
566     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
567   }
568   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
569   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "PCReset_GASM"
575 static PetscErrorCode PCReset_GASM(PC pc)
576 {
577   PC_GASM        *osm = (PC_GASM*)pc->data;
578   PetscErrorCode ierr;
579   PetscInt       i;
580 
581   PetscFunctionBegin;
582   if (osm->ksp) {
583     for (i=0; i<osm->n; i++) {
584       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
585     }
586   }
587   if (osm->pmat) {
588     if (osm->n > 0) {
589       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
590     }
591   }
592   if (osm->x) {
593     for (i=0; i<osm->n; i++) {
594       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
595       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
596     }
597   }
598   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
599   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
600 
601   ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr);
602   ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr);
603   if (osm->is) {
604     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
605     osm->is = 0;
606     osm->is_local = 0;
607   }
608   ierr = ISDestroy(&osm->gis);CHKERRQ(ierr);
609   ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "PCDestroy_GASM"
615 static PetscErrorCode PCDestroy_GASM(PC pc)
616 {
617   PC_GASM         *osm = (PC_GASM*)pc->data;
618   PetscErrorCode ierr;
619   PetscInt       i;
620 
621   PetscFunctionBegin;
622   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
623   if (osm->ksp) {
624     for (i=0; i<osm->n; i++) {
625       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
626     }
627     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
628   }
629   ierr = PetscFree(osm->x);CHKERRQ(ierr);
630   ierr = PetscFree(osm->y);CHKERRQ(ierr);
631   ierr = PetscFree(pc->data);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "PCSetFromOptions_GASM"
637 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
638   PC_GASM         *osm = (PC_GASM*)pc->data;
639   PetscErrorCode ierr;
640   PetscInt       blocks,ovl;
641   PetscBool      symset,flg;
642   PCGASMType      gasmtype;
643 
644   PetscFunctionBegin;
645   /* set the type to symmetric if matrix is symmetric */
646   if (!osm->type_set && pc->pmat) {
647     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
648     if (symset && flg) { osm->type = PC_GASM_BASIC; }
649   }
650   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
651     ierr = PetscOptionsInt("-pc_gasm_total_block_count","Total number of local subdomains across communicator","PCGASMSetTotalBlockCount",osm->n,&blocks,&flg);CHKERRQ(ierr);
652     if (flg) {ierr = PCGASMSetTotalBlockCount(pc,blocks);CHKERRQ(ierr); }
653     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
654     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
655     flg  = PETSC_FALSE;
656     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
657     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
658   ierr = PetscOptionsTail();CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 /*------------------------------------------------------------------------------------*/
663 
664 EXTERN_C_BEGIN
665 #undef __FUNCT__
666 #define __FUNCT__ "PCGASMSetSubdomains_GASM"
667 PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
668 {
669   PC_GASM         *osm = (PC_GASM*)pc->data;
670   PetscErrorCode ierr;
671   PetscInt       i;
672 
673   PetscFunctionBegin;
674   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, n = %D",n);
675   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
676 
677   if (!pc->setupcalled) {
678     osm->n            = n;
679     osm->is           = 0;
680     osm->is_local     = 0;
681     if (is) {
682       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
683     }
684     if (is_local) {
685       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
686     }
687     if (osm->is) {
688       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
689     }
690     if (is) {
691       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
692       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
693       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
694       osm->overlap = -1;
695     }
696     if (is_local) {
697       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
698       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
699     }
700   }
701   PetscFunctionReturn(0);
702 }
703 EXTERN_C_END
704 
705 EXTERN_C_BEGIN
706 #undef __FUNCT__
707 #define __FUNCT__ "PCGASMSetTotalBlockCount_GASM"
708 PetscErrorCode  PCGASMSetTotalBlockCount_GASM(PC pc,PetscInt N) {
709   PC_GASM         *osm = (PC_GASM*)pc->data;
710   PetscErrorCode ierr;
711   PetscMPIInt    rank,size;
712   PetscInt       n;
713 
714   PetscFunctionBegin;
715   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total number of subdomains must be > 0, N = %D",N);
716 
717   /*
718      Split the subdomains equally among all processors
719   */
720   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
721   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
722   n = N/size + ((N % size) > rank);
723   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
724   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
725   if (!pc->setupcalled) {
726     if (osm->is) {
727       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
728     }
729     osm->N            = N;
730     osm->n            = n;
731     osm->nmax         = N/size + ((N%size)?1:0);
732     osm->is           = 0;
733     osm->is_local     = 0;
734   }
735   PetscFunctionReturn(0);
736 }
737 EXTERN_C_END
738 
739 EXTERN_C_BEGIN
740 #undef __FUNCT__
741 #define __FUNCT__ "PCGASMSetOverlap_GASM"
742 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
743 {
744   PC_GASM *osm = (PC_GASM*)pc->data;
745 
746   PetscFunctionBegin;
747   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
748   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
749   if (!pc->setupcalled) {
750     osm->overlap = ovl;
751   }
752   PetscFunctionReturn(0);
753 }
754 EXTERN_C_END
755 
756 EXTERN_C_BEGIN
757 #undef __FUNCT__
758 #define __FUNCT__ "PCGASMSetType_GASM"
759 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
760 {
761   PC_GASM *osm = (PC_GASM*)pc->data;
762 
763   PetscFunctionBegin;
764   osm->type     = type;
765   osm->type_set = PETSC_TRUE;
766   PetscFunctionReturn(0);
767 }
768 EXTERN_C_END
769 
770 EXTERN_C_BEGIN
771 #undef __FUNCT__
772 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
773 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
774 {
775   PC_GASM *osm = (PC_GASM*)pc->data;
776 
777   PetscFunctionBegin;
778   osm->sort_indices = doSort;
779   PetscFunctionReturn(0);
780 }
781 EXTERN_C_END
782 
783 EXTERN_C_BEGIN
784 #undef __FUNCT__
785 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
786 /*
787    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
788         In particular, it would upset the global subdomain number calculation.
789 */
790 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
791 {
792   PC_GASM         *osm = (PC_GASM*)pc->data;
793   PetscErrorCode ierr;
794 
795   PetscFunctionBegin;
796   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
797 
798   if (n) {
799     *n = osm->n;
800   }
801   if (first) {
802     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
803     *first -= osm->n;
804   }
805   if (ksp) {
806     /* Assume that local solves are now different; not necessarily
807        true, though!  This flag is used only for PCView_GASM() */
808     *ksp                   = osm->ksp;
809     osm->same_local_solves = PETSC_FALSE;
810   }
811   PetscFunctionReturn(0);
812 }/* PCGASMGetSubKSP_GASM() */
813 EXTERN_C_END
814 
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "PCGASMSetSubdomains"
818 /*@C
819     PCGASMSetSubdomains - Sets the subdomains for this processor
820     for the additive Schwarz preconditioner.
821 
822     Collective on PC
823 
824     Input Parameters:
825 +   pc - the preconditioner context
826 .   n  - the number of subdomains for this processor
827 .   is - the index sets that define this processor's part of the subdomains (on which the correction is obtained)
828          (or PETSC_NULL for PETSc to determine subdomains)
829 -   is_local - the index sets that define the local part of this processor's part of the subdomains (on which the correction is applied)
830          (or PETSC_NULL to use the same as is)
831 
832     Notes:
833     The IS indices use the parallel, global numbering of the vector entries.
834     By default the GASM preconditioner uses 1 subdomain per processor.
835     Use PCGASMSetTotalBlockCount() to set the total number of subdomains across all processors
836     and to allow PETSc to assign them to processor and to compute the subdomains from that.
837 
838     Level: advanced
839 
840 .keywords: PC, GASM, set, subdomains, additive Schwarz
841 
842 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
843           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
844 @*/
845 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
846 {
847   PetscErrorCode ierr;
848 
849   PetscFunctionBegin;
850   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
851   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "PCGASMSetTotalBlockCount"
857 /*@C
858     PCGASMSetTotalBlockCount - Sets the total number of subdomains
859     cumulative across all processors across the communicator of a generalized
860     additive Schwarz preconditioner.  Either all or no processors in the
861     PC communicator must call this routine, with the same number.
862 
863     Collective on PC
864 
865     Input Parameters:
866 +   pc - the preconditioner context
867 -   n  - the total number of subdomains cumulative across all processors
868 
869     Options Database Key:
870     To set the total number of subdomain blocks rather than specify the
871     index sets, use the option
872 .    -pc_gasm_total_block_count <n> - Sets the total number of local subdomains (known as blocks) to be distributed among the processors
873 
874     By default the GASM preconditioner uses 1 subdomain per processor.
875 
876     Use PCGASMSetSubdomains() to set local subdomains.
877 
878     Level: advanced
879 
880 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
881 
882 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
883           PCGASMCreateSubdomains2D()
884 @*/
885 PetscErrorCode  PCGASMSetTotalBlockCount(PC pc,PetscInt N)
886 {
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
891   ierr = PetscTryMethod(pc,"PCGASMSetTotalBlockCount_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "PCGASMSetOverlap"
897 /*@
898     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
899     additive Schwarz preconditioner.  Either all or no processors in the
900     PC communicator must call this routine.
901 
902     Logically Collective on PC
903 
904     Input Parameters:
905 +   pc  - the preconditioner context
906 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
907 
908     Options Database Key:
909 .   -pc_gasm_overlap <overlap> - Sets overlap
910 
911     Notes:
912     By default the GASM preconditioner uses 1 subdomain per processor.  To use
913     multiple subdomain per perocessor, see PCGASMSetTotalBlockCount() or
914     PCGASMSetSubdomains() (and the option -pc_gasm_subdomain_count <n>).
915 
916     The overlap defaults to 1, so if one desires that no additional
917     overlap be computed beyond what may have been set with a call to
918     PCGASMSetTotalBlockCount() or PCGASMSetSubdomains(), then ovl
919     must be set to be 0.  In particular, if one does not explicitly set
920     the subdomains in application code, then all overlap would be computed
921     internally by PETSc, and using an overlap of 0 would result in an GASM
922     variant that is equivalent to the block Jacobi preconditioner.
923 
924     Note that one can define initial index sets with any overlap via
925     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
926     PETSc to extend that overlap further, if desired.
927 
928     Level: intermediate
929 
930 .keywords: PC, GASM, set, overlap
931 
932 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMGetSubKSP(),
933           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
934 @*/
935 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
936 {
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
941   PetscValidLogicalCollectiveInt(pc,ovl,2);
942   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "PCGASMSetType"
948 /*@
949     PCGASMSetType - Sets the type of restriction and interpolation used
950     for local problems in the additive Schwarz method.
951 
952     Logically Collective on PC
953 
954     Input Parameters:
955 +   pc  - the preconditioner context
956 -   type - variant of GASM, one of
957 .vb
958       PC_GASM_BASIC       - full interpolation and restriction
959       PC_GASM_RESTRICT    - full restriction, local processor interpolation
960       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
961       PC_GASM_NONE        - local processor restriction and interpolation
962 .ve
963 
964     Options Database Key:
965 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
966 
967     Level: intermediate
968 
969 .keywords: PC, GASM, set, type
970 
971 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMGetSubKSP(),
972           PCGASMCreateSubdomains2D()
973 @*/
974 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
980   PetscValidLogicalCollectiveEnum(pc,type,2);
981   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "PCGASMSetSortIndices"
987 /*@
988     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
989 
990     Logically Collective on PC
991 
992     Input Parameters:
993 +   pc  - the preconditioner context
994 -   doSort - sort the subdomain indices
995 
996     Level: intermediate
997 
998 .keywords: PC, GASM, set, type
999 
1000 .seealso: PCGASMSetSubdomains(), PCGASMSetTotalBlockCount(), PCGASMGetSubKSP(),
1001           PCGASMCreateSubdomains2D()
1002 @*/
1003 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1009   PetscValidLogicalCollectiveBool(pc,doSort,2);
1010   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "PCGASMGetSubKSP"
1016 /*@C
1017    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1018    this processor.
1019 
1020    Collective on PC iff first_local is requested
1021 
1022    Input Parameter:
1023 .  pc - the preconditioner context
1024 
1025    Output Parameters:
1026 +  n_local - the number of blocks on this processor or PETSC_NULL
1027 .  first_local - the global number of the first block on this processor or PETSC_NULL,
1028                  all processors must request or all must pass PETSC_NULL
1029 -  ksp - the array of KSP contexts
1030 
1031    Note:
1032    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1033 
1034    Currently for some matrix implementations only 1 block per processor
1035    is supported.
1036 
1037    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1038 
1039    Level: advanced
1040 
1041 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1042 
1043 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMSetOverlap(),
1044           PCGASMCreateSubdomains2D(),
1045 @*/
1046 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1047 {
1048   PetscErrorCode ierr;
1049 
1050   PetscFunctionBegin;
1051   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1052   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 /* -------------------------------------------------------------------------------------*/
1057 /*MC
1058    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1059            its own KSP object.
1060 
1061    Options Database Keys:
1062 +  -pc_gasm_total_block_count <n> - Sets total number of local subdomains (known as blocks) to be distributed among processors
1063 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1064 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1065 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1066 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1067 
1068      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1069       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1070       -pc_gasm_type basic to use the standard GASM.
1071 
1072    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1073      than one processor. Defaults to one block per processor.
1074 
1075      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1076         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1077 
1078      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1079          and set the options directly on the resulting KSP object (you can access its PC
1080          with KSPGetPC())
1081 
1082 
1083    Level: beginner
1084 
1085    Concepts: additive Schwarz method
1086 
1087     References:
1088     An additive variant of the Schwarz alternating method for the case of many subregions
1089     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1090 
1091     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1092     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1093 
1094 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1095            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1096            PCGASMSetTotalBlockCount(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1097 
1098 M*/
1099 
1100 EXTERN_C_BEGIN
1101 #undef __FUNCT__
1102 #define __FUNCT__ "PCCreate_GASM"
1103 PetscErrorCode  PCCreate_GASM(PC pc)
1104 {
1105   PetscErrorCode ierr;
1106   PC_GASM         *osm;
1107 
1108   PetscFunctionBegin;
1109   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1110   osm->N                 = PETSC_DECIDE;
1111   osm->n                 = PETSC_DECIDE;
1112   osm->nmax              = 0;
1113   osm->overlap           = 1;
1114   osm->ksp               = 0;
1115   osm->grestriction      = 0;
1116   osm->gprolongation     = 0;
1117   osm->gx                = 0;
1118   osm->gy                = 0;
1119   osm->x                 = 0;
1120   osm->y                 = 0;
1121   osm->is                = 0;
1122   osm->is_local          = 0;
1123   osm->pmat              = 0;
1124   osm->type              = PC_GASM_RESTRICT;
1125   osm->same_local_solves = PETSC_TRUE;
1126   osm->sort_indices      = PETSC_TRUE;
1127 
1128   pc->data                   = (void*)osm;
1129   pc->ops->apply             = PCApply_GASM;
1130   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1131   pc->ops->setup             = PCSetUp_GASM;
1132   pc->ops->reset             = PCReset_GASM;
1133   pc->ops->destroy           = PCDestroy_GASM;
1134   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1135   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1136   pc->ops->view              = PCView_GASM;
1137   pc->ops->applyrichardson   = 0;
1138 
1139   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSubdomains_C","PCGASMSetSubdomains_GASM",
1140                     PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalBlockCount_C","PCGASMSetTotalBlockCount_GASM",
1142                     PCGASMSetTotalBlockCount_GASM);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1144                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1146                     PCGASMSetType_GASM);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1148                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1150                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 EXTERN_C_END
1154 
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "PCGASMCreateLocalSubdomains"
1158 /*@C
1159    PCGASMCreateLocalSubdomains - Creates n local index sets for the overlapping
1160    Schwarz preconditioner for a any problem based on its matrix.
1161 
1162    Collective
1163 
1164    Input Parameters:
1165 +  A - The global matrix operator
1166 -  n - the number of local subdomains
1167 
1168    Output Parameters:
1169 +  outis       - the array of index sets defining the local subdomains (on which the correction is computed)
1170 -  outis_local - the array of index sets defining the local portions of the subdomains (on which the correction is applied)
1171 
1172    Level: advanced
1173 
1174    Note: this generates n nonoverlapping subdomains on PETSC_COMM_SELF;
1175          PCGASM will generate the overlap from these if you use them
1176          in PCGASMSetSubdomains() and set a nonzero overlap with
1177          PCGASMSetOverlap()
1178 
1179     In the Fortran version you must provide the array outis[] already allocated of length n.
1180 
1181 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1182 
1183 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1184 @*/
1185 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt n, IS* outis[], IS*outis_local[])
1186 {
1187   MatPartitioning           mpart;
1188   const char                *prefix;
1189   PetscErrorCode            (*f)(Mat,MatReuse,Mat*);
1190   PetscMPIInt               size;
1191   PetscInt                  i,j,rstart,rend,bs;
1192   PetscBool                 isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1193   Mat                       Ad = PETSC_NULL, adj;
1194   IS                        ispart,isnumb,*is;
1195   PetscErrorCode            ierr;
1196 
1197   PetscFunctionBegin;
1198   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1199   PetscValidPointer(outis,3);
1200   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1201 
1202   /* Get prefix, row distribution, and block size */
1203   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1205   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1206   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1207 
1208   /* Get diagonal block from matrix if possible */
1209   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1210   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1211   if (f) {
1212     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1213   } else if (size == 1) {
1214     Ad = A;
1215   }
1216   if (Ad) {
1217     ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1218     if (!isbaij) {ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1219   }
1220   if (Ad && n > 1) {
1221     PetscBool  match,done;
1222     /* Try to setup a good matrix partitioning if available */
1223     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1224     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1225     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1226     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1227     if (!match) {
1228       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1229     }
1230     if (!match) { /* assume a "good" partitioner is available */
1231       PetscInt na,*ia,*ja;
1232       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1233       if (done) {
1234         /* Build adjacency matrix by hand. Unfortunately a call to
1235            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1236            remove the block-aij structure and we cannot expect
1237            MatPartitioning to split vertices as we need */
1238         PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1239         nnz = 0;
1240         for (i=0; i<na; i++) { /* count number of nonzeros */
1241           len = ia[i+1] - ia[i];
1242           row = ja + ia[i];
1243           for (j=0; j<len; j++) {
1244             if (row[j] == i) { /* don't count diagonal */
1245               len--; break;
1246             }
1247           }
1248           nnz += len;
1249         }
1250         ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1251         ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1252         nnz    = 0;
1253         iia[0] = 0;
1254         for (i=0; i<na; i++) { /* fill adjacency */
1255           cnt = 0;
1256           len = ia[i+1] - ia[i];
1257           row = ja + ia[i];
1258           for (j=0; j<len; j++) {
1259             if (row[j] != i) { /* if not diagonal */
1260               jja[nnz+cnt++] = row[j];
1261             }
1262           }
1263           nnz += cnt;
1264           iia[i+1] = nnz;
1265         }
1266         /* Partitioning of the adjacency matrix */
1267         ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1268         ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1269         ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1270         ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1271         ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1272         ierr = MatDestroy(&adj);CHKERRQ(ierr);
1273         foundpart = PETSC_TRUE;
1274       }
1275       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1276     }
1277     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1278   }
1279 
1280   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1281   *outis = is;
1282 
1283   if (!foundpart) {
1284 
1285     /* Partitioning by contiguous chunks of rows */
1286 
1287     PetscInt mbs   = (rend-rstart)/bs;
1288     PetscInt start = rstart;
1289     for (i=0; i<n; i++) {
1290       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1291       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1292       start += count;
1293     }
1294 
1295   } else {
1296 
1297     /* Partitioning by adjacency of diagonal block  */
1298 
1299     const PetscInt *numbering;
1300     PetscInt       *count,nidx,*indices,*newidx,start=0;
1301     /* Get node count in each partition */
1302     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1303     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1304     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1305       for (i=0; i<n; i++) count[i] *= bs;
1306     }
1307     /* Build indices from node numbering */
1308     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1309     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1310     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1311     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1312     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1313     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1314     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1315       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1316       for (i=0; i<nidx; i++)
1317         for (j=0; j<bs; j++)
1318           newidx[i*bs+j] = indices[i]*bs + j;
1319       ierr = PetscFree(indices);CHKERRQ(ierr);
1320       nidx   *= bs;
1321       indices = newidx;
1322     }
1323     /* Shift to get global indices */
1324     for (i=0; i<nidx; i++) indices[i] += rstart;
1325 
1326     /* Build the index sets for each block */
1327     for (i=0; i<n; i++) {
1328       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1329       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1330       start += count[i];
1331     }
1332 
1333     ierr = PetscFree(count);
1334     ierr = PetscFree(indices);
1335     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1336     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1337   }
1338 
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "PCGASMDestroySubdomains"
1344 /*@C
1345    PCGASMDestroySubdomains - Destroys the index sets created with
1346    PCGASMCreateLocalSubdomains() or PCGASMCreateSubdomains2D. Should be
1347    called after setting subdomains with PCGASMSetSubdomains().
1348 
1349    Collective
1350 
1351    Input Parameters:
1352 +  n - the number of index sets
1353 .  is - the array of index sets
1354 -  is_local - the array of local index sets, can be PETSC_NULL
1355 
1356    Level: advanced
1357 
1358 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1359 
1360 .seealso: PCGASMCreateLocalSubdomains(), PCGASMSetSubdomains()
1361 @*/
1362 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1363 {
1364   PetscInt       i;
1365   PetscErrorCode ierr;
1366   PetscFunctionBegin;
1367   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1368   PetscValidPointer(is,2);
1369   for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); }
1370   ierr = PetscFree(is);CHKERRQ(ierr);
1371   if (is_local) {
1372     PetscValidPointer(is_local,3);
1373     for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); }
1374     ierr = PetscFree(is_local);CHKERRQ(ierr);
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1381 {                                                                                                       \
1382  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1383   /*                                                                                                    \
1384    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1385    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1386    subdomain).                                                                                          \
1387    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1388    of the intersection.                                                                                 \
1389   */                                                                                                    \
1390   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1391   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1392   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1393   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1394   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1395   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1396   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1397   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1398   /* Now compute the size of the local subdomain n. */ \
1399   *n = 0;                                               \
1400   if(*ylow_loc < *yhigh_loc) {                           \
1401     PetscInt width = xright-xleft;                     \
1402     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1403     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1404     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1405   }\
1406 }
1407 
1408 
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1412 /*@
1413    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1414    preconditioner for a two-dimensional problem on a regular grid.
1415 
1416    Collective
1417 
1418    Input Parameters:
1419 +  M, N               - the global number of grid points in the x and y directions
1420 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1421 .  dof                - degrees of freedom per node
1422 -  overlap            - overlap in mesh lines
1423 
1424    Output Parameters:
1425 +  Nsub - the number of local subdomains created
1426 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1427 -  is_local - array of index sets defining non-overlapping subdomains
1428 
1429 
1430    Level: advanced
1431 
1432 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1433 
1434 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1435           PCGASMSetOverlap()
1436 @*/
1437 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1438 {
1439   PetscErrorCode ierr;
1440   PetscMPIInt    size, rank;
1441   PetscInt       i, j;
1442   PetscInt       maxheight, maxwidth;
1443   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1444   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1445   PetscInt       x[2][2], y[2][2], n[2];
1446   PetscInt       first, last;
1447   PetscInt       nidx, *idx;
1448   PetscInt       ii,jj,s,q,d;
1449   PetscInt       k,kk;
1450   PetscMPIInt    color;
1451   MPI_Comm       comm, subcomm;
1452   IS             **iis = 0;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1456   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1457   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1458   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1459   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1460 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1461 
1462   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1463   s = 0;
1464   ystart = 0;
1465   for (j=0; j<Ndomains; ++j) {
1466     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1467     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1468     /* Vertical domain limits with an overlap. */
1469     ylow = PetscMax(ystart - overlap,0);
1470     yhigh = PetscMin(ystart + maxheight + overlap,N);
1471     xstart = 0;
1472     for (i=0; i<Mdomains; ++i) {
1473       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1474       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1475       /* Horizontal domain limits with an overlap. */
1476       xleft   = PetscMax(xstart - overlap,0);
1477       xright  = PetscMin(xstart + maxwidth + overlap,M);
1478       /*
1479 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1480       */
1481       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1482       if(nidx) {
1483         ++s;
1484       }
1485       xstart += maxwidth;
1486     }/* for(i = 0; i < Mdomains; ++i) */
1487     ystart += maxheight;
1488   }/* for(j = 0; j < Ndomains; ++j) */
1489   /* Now we can allocate the necessary number of ISs. */
1490   *nsub = s;
1491   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1492   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1493   s = 0;
1494   ystart = 0;
1495   for (j=0; j<Ndomains; ++j) {
1496     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1497     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1498     /* Vertical domain limits with an overlap. */
1499     y[0][0] = PetscMax(ystart - overlap,0);
1500     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1501     /* Vertical domain limits without an overlap. */
1502     y[1][0] = ystart;
1503     y[1][1] = PetscMin(ystart + maxheight,N);
1504     xstart = 0;
1505     for (i=0; i<Mdomains; ++i) {
1506       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1507       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1508       /* Horizontal domain limits with an overlap. */
1509       x[0][0]  = PetscMax(xstart - overlap,0);
1510       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1511       /* Horizontal domain limits without an overlap. */
1512       x[1][0] = xstart;
1513       x[1][1] = PetscMin(xstart+maxwidth,M);
1514       /*
1515 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1516 	 Do this twice: first for the domains with overlaps, and once without.
1517 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1518       */
1519       for(q = 0; q < 2; ++q) {
1520 	/*
1521 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1522 	  according to whether the domain with an overlap or without is considered.
1523 	*/
1524 	xleft = x[q][0]; xright = x[q][1];
1525 	ylow  = y[q][0]; yhigh  = y[q][1];
1526         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1527 	nidx *= dof;
1528         n[q] = nidx;
1529         /*
1530          Based on the counted number of indices in the local domain *with an overlap*,
1531          construct a subcommunicator of all the processors supporting this domain.
1532          Observe that a domain with an overlap might have nontrivial local support,
1533          while the domain without an overlap might not.  Hence, the decision to participate
1534          in the subcommunicator must be based on the domain with an overlap.
1535          */
1536 	if (q == 0) {
1537 	  if(nidx) {
1538 	    color = 1;
1539 	  } else {
1540 	    color = MPI_UNDEFINED;
1541 	  }
1542 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1543 	}
1544         /*
1545          Proceed only if the number of local indices *with an overlap* is nonzero.
1546          */
1547         if (n[0]) {
1548           if(q == 0) {
1549             iis = is;
1550           }
1551           if (q == 1) {
1552             /*
1553              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1554              Moreover, if the overlap is zero, the two ISs are identical.
1555              */
1556             if (overlap == 0) {
1557               (*is_local)[s] = (*is)[s];
1558               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1559               continue;
1560             } else {
1561               iis = is_local;
1562               subcomm = ((PetscObject)(*is)[s])->comm;
1563             }
1564           }/* if(q == 1) */
1565           idx = PETSC_NULL;
1566 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1567           if(nidx) {
1568             k    = 0;
1569             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1570               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1571               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1572               kk = dof*(M*jj + x0);
1573               for (ii=x0; ii<x1; ++ii) {
1574                 for(d = 0; d < dof; ++d) {
1575                   idx[k++] = kk++;
1576                 }
1577               }
1578             }
1579           }
1580 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1581 	}/* if(n[0]) */
1582       }/* for(q = 0; q < 2; ++q) */
1583       if(n[0]) {
1584         ++s;
1585       }
1586       xstart += maxwidth;
1587     }/* for(i = 0; i < Mdomains; ++i) */
1588     ystart += maxheight;
1589   }/* for(j = 0; j < Ndomains; ++j) */
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "PCGASMGetSubdomains"
1595 /*@C
1596     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1597     for the additive Schwarz preconditioner.
1598 
1599     Not Collective
1600 
1601     Input Parameter:
1602 .   pc - the preconditioner context
1603 
1604     Output Parameters:
1605 +   n        - the number of subdomains for this processor (default value = 1)
1606 .   is       - the index sets that define the subdomains supported on this processor
1607 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1608 
1609 
1610     Notes:
1611     The IS numbering is in the parallel, global numbering of the vector.
1612 
1613     Level: advanced
1614 
1615 .keywords: PC, GASM, get, subdomains, additive Schwarz
1616 
1617 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1618           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1619 @*/
1620 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1621 {
1622   PC_GASM         *osm;
1623   PetscErrorCode ierr;
1624   PetscBool      match;
1625 
1626   PetscFunctionBegin;
1627   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1628   PetscValidIntPointer(n,2);
1629   if (is) PetscValidPointer(is,3);
1630   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1631   if (!match) {
1632     if (n)  *n  = 0;
1633     if (is) *is = PETSC_NULL;
1634   } else {
1635     osm = (PC_GASM*)pc->data;
1636     if (n)  *n  = osm->n;
1637     if (is) *is = osm->is;
1638     if (is_local) *is_local = osm->is_local;
1639   }
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "PCGASMGetSubmatrices"
1645 /*@C
1646     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1647     only) for the additive Schwarz preconditioner.
1648 
1649     Not Collective
1650 
1651     Input Parameter:
1652 .   pc - the preconditioner context
1653 
1654     Output Parameters:
1655 +   n   - the number of matrices for this processor (default value = 1)
1656 -   mat - the matrices
1657 
1658     Notes: matrices returned by this routine have the same communicators as the index sets (IS)
1659            used to define subdomains in PCGASMSetSubdomains(), or PETSC_COMM_SELF, if the
1660            subdomains were defined using PCGASMSetTotalBlockCount().
1661     Level: advanced
1662 
1663 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1664 
1665 .seealso: PCGASMSetTotalBlockCount(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1666           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1667 @*/
1668 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1669 {
1670   PC_GASM         *osm;
1671   PetscErrorCode ierr;
1672   PetscBool      match;
1673 
1674   PetscFunctionBegin;
1675   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1676   PetscValidIntPointer(n,2);
1677   if (mat) PetscValidPointer(mat,3);
1678   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1679   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1680   if (!match) SETERRQ2(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1681   osm = (PC_GASM*)pc->data;
1682   if (n)   *n   = osm->n;
1683   if (mat) *mat = osm->pmat;
1684 
1685   PetscFunctionReturn(0);
1686 }
1687