xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision e91c6855ec8eedcfe894fb1dc8d9ee5acd2335f4)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
5   In this version each processor may have any number of subdomains and a subdomain may live on multiple
6   processors.
7 
8        N    - total number of subdomains on all processors
9        n    - actual number of subdomains on this processor
10        nmax - maximum number of subdomains per processor
11 */
12 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
13 
14 typedef struct {
15   PetscInt   N,n,nmax;
16   PetscInt   overlap;             /* overlap requested by user */
17   KSP        *ksp;                /* linear solvers for each block */
18   Vec        gx,gy;               /* Merged work vectors */
19   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
20   IS         gis, gis_local;      /* merged ISs */
21   VecScatter grestriction;        /* merged restriction */
22   VecScatter gprolongation;       /* merged prolongation */
23   IS         *is;                 /* index set that defines each overlapping subdomain */
24   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
25   Mat        *pmat;               /* subdomain block matrices */
26   PCGASMType  type;               /* use reduced interpolation, restriction or both */
27   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
28   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
29   PetscBool  sort_indices;        /* flag to sort subdomain indices */
30 } PC_GASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCGASMPrintSubdomains"
34 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
35 {
36   PC_GASM         *osm  = (PC_GASM*)pc->data;
37   const char     *prefix;
38   char           fname[PETSC_MAX_PATH_LEN+1];
39   PetscViewer    viewer, sviewer;
40   PetscInt       i,j,nidx;
41   const PetscInt *idx;
42   char           *cidx;
43   PetscBool      found;
44   PetscMPIInt    size, rank;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
49   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
50   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
51   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
52   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
53   for (i=0;i<osm->n;++i) {
54     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
55     ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr);
56     /*
57      No more than 15 characters per index plus a space.
58      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
59      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
60      For nidx == 0, the whole string 16 '\0'.
61      */
62     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
63     ierr = ISGetIndices(osm->is[i], &idx);    CHKERRQ(ierr);
64     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
65     for(j = 0; j < nidx; ++j) {
66       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
67     }
68     ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
69     ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr);
70 
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
74     ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
75     ierr = PetscFree(cidx); CHKERRQ(ierr);
76     ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
77     if (osm->is_local) {
78       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
79       ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr);
80       /*
81        No more than 15 characters per index plus a space.
82        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
83        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
84        For nidx == 0, the whole string 16 '\0'.
85        */
86       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
87       ierr = ISGetIndices(osm->is_local[i], &idx);    CHKERRQ(ierr);
88       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
89       for(j = 0; j < nidx; ++j) {
90         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
91       }
92       ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
93       ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr);
94 
95       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr);
96       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
97       ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
98       ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
99       ierr = PetscFree(cidx); CHKERRQ(ierr);
100       ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "PCView_GASM"
109 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
110 {
111   PC_GASM         *osm = (PC_GASM*)pc->data;
112   const char     *prefix;
113   PetscErrorCode ierr;
114   PetscMPIInt    rank, size;
115   PetscInt       i,bsz;
116   PetscBool      iascii,isstring, print_subdomains;
117   PetscViewer    sviewer;
118 
119 
120   PetscFunctionBegin;
121   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
122   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
123   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
124   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
125   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
126   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
127   if (iascii) {
128     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
129     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
130     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
131     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
132     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
133     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
134     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
135     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
136     if (osm->same_local_solves) {
137       if (osm->ksp) {
138         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
139         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
140         if (!rank) {
141           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
142           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
143           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
144         }
145         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
146       }
147     } else {
148       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
149       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
150       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
151       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
152       for (i=0; i<osm->nmax; i++) {
153         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
154         if (i < osm->n) {
155           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
156           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
157           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
158           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
159         }
160         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
161       }
162       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
163       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
164     }
165   } else if (isstring) {
166     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
167     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
168     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
169     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
170   } else {
171     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
172   }
173   ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
174   if(print_subdomains) {
175     ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr);
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 
181 
182 
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "PCSetUp_GASM"
186 static PetscErrorCode PCSetUp_GASM(PC pc)
187 {
188   PC_GASM         *osm  = (PC_GASM*)pc->data;
189   PetscErrorCode ierr;
190   PetscBool      symset,flg;
191   PetscInt       i;
192   PetscMPIInt    rank, size;
193   MatReuse       scall = MAT_REUSE_MATRIX;
194   KSP            ksp;
195   PC             subpc;
196   const char     *prefix,*pprefix;
197   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
198   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
199   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
200   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
201   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
202   Vec            x,y;
203   PetscScalar    *gxarray, *gyarray;
204   PetscInt       gfirst, glast;
205 
206   PetscFunctionBegin;
207   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
208   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
209   if (!pc->setupcalled) {
210 
211     if (!osm->type_set) {
212       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
213       if (symset && flg) { osm->type = PC_GASM_BASIC; }
214     }
215 
216     if (osm->N == PETSC_DECIDE && osm->n < 1) {
217       /* no subdomains given, use one per processor */
218       osm->nmax = osm->n = 1;
219       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
220       osm->N = size;
221     } else if (osm->N == PETSC_DECIDE) {
222       PetscInt inwork[2], outwork[2];
223       /* determine global number of subdomains and the max number of local subdomains */
224       inwork[0] = inwork[1] = osm->n;
225       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
226       osm->nmax = outwork[0];
227       osm->N    = outwork[1];
228     }
229     if (!osm->is){ /* create the index sets */
230       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
231     }
232     if (!osm->is_local) {
233       /*
234 	 This indicates that osm->is should define a nonoverlapping decomposition
235 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
236 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
237 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
238 	 Therefore, osm->is will be used to define osm->is_local.
239 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
240 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
241 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
242       */
243       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
244       for (i=0; i<osm->n; i++) {
245         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
246           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
247           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
248         } else {
249           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
250           osm->is_local[i] = osm->is[i];
251         }
252       }
253     }
254     /* Beyond this point osm->is_local is not null. */
255     if (osm->overlap > 0) {
256       /* Extend the "overlapping" regions by a number of steps */
257       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
258     }
259     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
260     flg  = PETSC_FALSE;
261     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
262     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
263 
264     if (osm->sort_indices) {
265       for (i=0; i<osm->n; i++) {
266         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
267 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
268       }
269     }
270     /* Merge the ISs, create merged vectors and scatter contexts. */
271     /* Restriction ISs. */
272     ddn = 0;
273     for (i=0; i<osm->n; i++) {
274       ierr = ISGetLocalSize(osm->is[i],&dn);          CHKERRQ(ierr);
275       ddn += dn;
276     }
277     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr);
278     ddn = 0;
279     for (i=0; i<osm->n; i++) {
280       ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr);
281       ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr);
282       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr);
283       ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr);
284       ddn += dn;
285     }
286     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr);
287     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
288     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr);
289     ierr = VecDuplicate(osm->gx,&osm->gy);                                     CHKERRQ(ierr);
290     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);                     CHKERRQ(ierr);
291     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);              CHKERRQ(ierr);
292     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));     CHKERRQ(ierr);
293     ierr = ISDestroy(gid);                                                     CHKERRQ(ierr);
294     /* Prolongation ISs */
295     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
296       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
297       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
298       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
299       /**/
300       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
301       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
302       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
303       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
304       PetscInt               j;
305       ddn_local = 0;
306       for (i=0; i<osm->n; i++) {
307 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
308 	ddn_local += dn_local;
309       }
310       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr);
311       ddn_local = 0;
312       for (i=0; i<osm->n; i++) {
313 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
314 	ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr);
315 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr);
316 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr);
317 	ddn_local += dn_local;
318       }
319       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr);
320       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr);
321       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
322       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
323       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
324       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
325       /* Now convert these localized indices into the global indices into the merged output vector. */
326       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr);
327       for(j=0; j < ddn_llocal; ++j) {
328 	ddidx_llocal[j] += gfirst;
329       }
330       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr);
331       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);              CHKERRQ(ierr);
332       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
333     }
334     /* Create the subdomain work vectors. */
335     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
336     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
337     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
338     ierr = VecGetArray(osm->gx, &gxarray);                                     CHKERRQ(ierr);
339     ierr = VecGetArray(osm->gy, &gyarray);                                     CHKERRQ(ierr);
340     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
341       PetscInt dN;
342       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
343       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
344       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
345       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
346     }
347     ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr);
348     ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr);
349     ierr = VecDestroy(x); CHKERRQ(ierr);
350     ierr = VecDestroy(y); CHKERRQ(ierr);
351     /* Create the local solvers */
352     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
353     for (i=0; i<osm->n; i++) { /* KSPs are local */
354       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
355       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
356       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
357       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
358       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
359       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
360       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
361       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
362       osm->ksp[i] = ksp;
363     }
364     scall = MAT_INITIAL_MATRIX;
365 
366   } else {
367     /*
368        Destroy the blocks from the previous iteration
369     */
370     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
371       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
372       scall = MAT_INITIAL_MATRIX;
373     }
374   }
375 
376   /*
377      Extract out the submatrices.
378   */
379   if(size > 1) {
380     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
381   }
382   else {
383     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
384   }
385   if (scall == MAT_INITIAL_MATRIX) {
386     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
387     for (i=0; i<osm->n; i++) {
388       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
389       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
390     }
391   }
392 
393   /* Return control to the user so that the submatrices can be modified (e.g., to apply
394      different boundary conditions for the submatrices than for the global problem) */
395   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
396 
397   /*
398      Loop over submatrices putting them into local ksp
399   */
400   for (i=0; i<osm->n; i++) {
401     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
402     if (!pc->setupcalled) {
403       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
404     }
405   }
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
412 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
413 {
414   PC_GASM         *osm = (PC_GASM*)pc->data;
415   PetscErrorCode ierr;
416   PetscInt       i;
417 
418   PetscFunctionBegin;
419   for (i=0; i<osm->n; i++) {
420     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
421   }
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "PCApply_GASM"
427 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
428 {
429   PC_GASM         *osm = (PC_GASM*)pc->data;
430   PetscErrorCode ierr;
431   PetscInt       i;
432   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
433 
434   PetscFunctionBegin;
435   /*
436      Support for limiting the restriction or interpolation to only local
437      subdomain values (leaving the other values 0).
438   */
439   if (!(osm->type & PC_GASM_RESTRICT)) {
440     forward = SCATTER_FORWARD_LOCAL;
441     /* have to zero the work RHS since scatter may leave some slots empty */
442     ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr);
443   }
444   if (!(osm->type & PC_GASM_INTERPOLATE)) {
445     reverse = SCATTER_REVERSE_LOCAL;
446   }
447 
448   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
449   ierr = VecZeroEntries(y);CHKERRQ(ierr);
450   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
451   /* do the subdomain solves */
452   for (i=0; i<osm->n; ++i) {
453     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
454   }
455   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
456   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "PCApplyTranspose_GASM"
462 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
463 {
464   PC_GASM         *osm = (PC_GASM*)pc->data;
465   PetscErrorCode ierr;
466   PetscInt       i;
467   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
468 
469   PetscFunctionBegin;
470   /*
471      Support for limiting the restriction or interpolation to only local
472      subdomain values (leaving the other values 0).
473 
474      Note: these are reversed from the PCApply_GASM() because we are applying the
475      transpose of the three terms
476   */
477   if (!(osm->type & PC_GASM_INTERPOLATE)) {
478     forward = SCATTER_FORWARD_LOCAL;
479     /* have to zero the work RHS since scatter may leave some slots empty */
480     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
481   }
482   if (!(osm->type & PC_GASM_RESTRICT)) {
483     reverse = SCATTER_REVERSE_LOCAL;
484   }
485 
486   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
487   ierr = VecZeroEntries(y);CHKERRQ(ierr);
488   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
489   /* do the local solves */
490   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
491     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
492   }
493   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "PCDestroy_GASM"
500 static PetscErrorCode PCDestroy_GASM(PC pc)
501 {
502   PC_GASM         *osm = (PC_GASM*)pc->data;
503   PetscErrorCode ierr;
504   PetscInt       i;
505 
506   PetscFunctionBegin;
507   if (osm->ksp) {
508     for (i=0; i<osm->n; i++) {
509       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
510     }
511     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
512   }
513   if (osm->pmat) {
514     if (osm->n > 0) {
515       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
516     }
517   }
518   for (i=0; i<osm->n; i++) {
519     ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
520     ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
521   }
522   ierr = PetscFree(osm->x);CHKERRQ(ierr);
523   ierr = PetscFree(osm->y);CHKERRQ(ierr);
524   ierr = VecDestroy(osm->gx); CHKERRQ(ierr);
525   ierr = VecDestroy(osm->gy); CHKERRQ(ierr);
526 
527   ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr);
528   ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr);
529 
530   if (osm->is) {
531     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
532   }
533   ierr = PetscFree(pc->data);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "PCSetFromOptions_GASM"
539 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
540   PC_GASM         *osm = (PC_GASM*)pc->data;
541   PetscErrorCode ierr;
542   PetscInt       blocks,ovl;
543   PetscBool      symset,flg;
544   PCGASMType      gasmtype;
545 
546   PetscFunctionBegin;
547   /* set the type to symmetric if matrix is symmetric */
548   if (!osm->type_set && pc->pmat) {
549     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
550     if (symset && flg) { osm->type = PC_GASM_BASIC; }
551   }
552   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
553     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
554     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
555     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
556     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
557     flg  = PETSC_FALSE;
558     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
559     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
560   ierr = PetscOptionsTail();CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
564 /*------------------------------------------------------------------------------------*/
565 
566 EXTERN_C_BEGIN
567 #undef __FUNCT__
568 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
569 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
570 {
571   PC_GASM         *osm = (PC_GASM*)pc->data;
572   PetscErrorCode ierr;
573   PetscInt       i;
574 
575   PetscFunctionBegin;
576   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
577   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
578 
579   if (!pc->setupcalled) {
580     osm->n            = n;
581     osm->is           = 0;
582     osm->is_local     = 0;
583     if (is) {
584       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
585     }
586     if (is_local) {
587       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
588     }
589     if (osm->is) {
590       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
591     }
592     if (is) {
593       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
594       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
595       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
596       osm->overlap = -1;
597     }
598     if (is_local) {
599       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
600       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
601     }
602   }
603   PetscFunctionReturn(0);
604 }
605 EXTERN_C_END
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
610 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
611   PC_GASM         *osm = (PC_GASM*)pc->data;
612   PetscErrorCode ierr;
613   PetscMPIInt    rank,size;
614   PetscInt       n;
615 
616   PetscFunctionBegin;
617   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
618 
619   /*
620      Split the subdomains equally among all processors
621   */
622   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
623   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
624   n = N/size + ((N % size) > rank);
625   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
626   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
627   if (!pc->setupcalled) {
628     if (osm->is) {
629       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
630     }
631     osm->N            = N;
632     osm->n            = n;
633     osm->is           = 0;
634     osm->is_local     = 0;
635   }
636   PetscFunctionReturn(0);
637 }/* PCGASMSetTotalSubdomains_GASM() */
638 EXTERN_C_END
639 
640 EXTERN_C_BEGIN
641 #undef __FUNCT__
642 #define __FUNCT__ "PCGASMSetOverlap_GASM"
643 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
644 {
645   PC_GASM *osm = (PC_GASM*)pc->data;
646 
647   PetscFunctionBegin;
648   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
649   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
650   if (!pc->setupcalled) {
651     osm->overlap = ovl;
652   }
653   PetscFunctionReturn(0);
654 }
655 EXTERN_C_END
656 
657 EXTERN_C_BEGIN
658 #undef __FUNCT__
659 #define __FUNCT__ "PCGASMSetType_GASM"
660 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
661 {
662   PC_GASM *osm = (PC_GASM*)pc->data;
663 
664   PetscFunctionBegin;
665   osm->type     = type;
666   osm->type_set = PETSC_TRUE;
667   PetscFunctionReturn(0);
668 }
669 EXTERN_C_END
670 
671 EXTERN_C_BEGIN
672 #undef __FUNCT__
673 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
674 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
675 {
676   PC_GASM *osm = (PC_GASM*)pc->data;
677 
678   PetscFunctionBegin;
679   osm->sort_indices = doSort;
680   PetscFunctionReturn(0);
681 }
682 EXTERN_C_END
683 
684 EXTERN_C_BEGIN
685 #undef __FUNCT__
686 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
687 /*
688    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
689         In particular, it would upset the global subdomain number calculation.
690 */
691 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
692 {
693   PC_GASM         *osm = (PC_GASM*)pc->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
698 
699   if (n) {
700     *n = osm->n;
701   }
702   if (first) {
703     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
704     *first -= osm->n;
705   }
706   if (ksp) {
707     /* Assume that local solves are now different; not necessarily
708        true though!  This flag is used only for PCView_GASM() */
709     *ksp                   = osm->ksp;
710     osm->same_local_solves = PETSC_FALSE;
711   }
712   PetscFunctionReturn(0);
713 }/* PCGASMGetSubKSP_GASM() */
714 EXTERN_C_END
715 
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "PCGASMSetLocalSubdomains"
719 /*@C
720     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
721     only) for the additive Schwarz preconditioner.
722 
723     Collective on PC
724 
725     Input Parameters:
726 +   pc - the preconditioner context
727 .   n - the number of subdomains for this processor (default value = 1)
728 .   is - the index set that defines the subdomains for this processor
729          (or PETSC_NULL for PETSc to determine subdomains)
730 -   is_local - the index sets that define the local part of the subdomains for this processor
731          (or PETSC_NULL to use the default of 1 subdomain per process)
732 
733     Notes:
734     The IS numbering is in the parallel, global numbering of the vector.
735 
736     By default the GASM preconditioner uses 1 block per processor.
737 
738     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
739 
740     Level: advanced
741 
742 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
743 
744 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
745           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
746 @*/
747 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
748 {
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
753   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "PCGASMSetTotalSubdomains"
759 /*@C
760     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
761     additive Schwarz preconditioner.  Either all or no processors in the
762     PC communicator must call this routine, with the same index sets.
763 
764     Collective on PC
765 
766     Input Parameters:
767 +   pc - the preconditioner context
768 .   n - the number of subdomains for all processors
769 .   is - the index sets that define the subdomains for all processor
770          (or PETSC_NULL for PETSc to determine subdomains)
771 -   is_local - the index sets that define the local part of the subdomains for this processor
772          (or PETSC_NULL to use the default of 1 subdomain per process)
773 
774     Options Database Key:
775     To set the total number of subdomain blocks rather than specify the
776     index sets, use the option
777 .    -pc_gasm_blocks <blks> - Sets total blocks
778 
779     Notes:
780     Currently you cannot use this to set the actual subdomains with the argument is.
781 
782     By default the GASM preconditioner uses 1 block per processor.
783 
784     These index sets cannot be destroyed until after completion of the
785     linear solves for which the GASM preconditioner is being used.
786 
787     Use PCGASMSetLocalSubdomains() to set local subdomains.
788 
789     Level: advanced
790 
791 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
792 
793 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
794           PCGASMCreateSubdomains2D()
795 @*/
796 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
797 {
798   PetscErrorCode ierr;
799 
800   PetscFunctionBegin;
801   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
802   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "PCGASMSetOverlap"
808 /*@
809     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
810     additive Schwarz preconditioner.  Either all or no processors in the
811     PC communicator must call this routine.
812 
813     Logically Collective on PC
814 
815     Input Parameters:
816 +   pc  - the preconditioner context
817 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
818 
819     Options Database Key:
820 .   -pc_gasm_overlap <ovl> - Sets overlap
821 
822     Notes:
823     By default the GASM preconditioner uses 1 block per processor.  To use
824     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
825     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
826 
827     The overlap defaults to 1, so if one desires that no additional
828     overlap be computed beyond what may have been set with a call to
829     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
830     must be set to be 0.  In particular, if one does not explicitly set
831     the subdomains an application code, then all overlap would be computed
832     internally by PETSc, and using an overlap of 0 would result in an GASM
833     variant that is equivalent to the block Jacobi preconditioner.
834 
835     Note that one can define initial index sets with any overlap via
836     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
837     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
838     if desired.
839 
840     Level: intermediate
841 
842 .keywords: PC, GASM, set, overlap
843 
844 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
845           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
846 @*/
847 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
848 {
849   PetscErrorCode ierr;
850 
851   PetscFunctionBegin;
852   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
853   PetscValidLogicalCollectiveInt(pc,ovl,2);
854   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "PCGASMSetType"
860 /*@
861     PCGASMSetType - Sets the type of restriction and interpolation used
862     for local problems in the additive Schwarz method.
863 
864     Logically Collective on PC
865 
866     Input Parameters:
867 +   pc  - the preconditioner context
868 -   type - variant of GASM, one of
869 .vb
870       PC_GASM_BASIC       - full interpolation and restriction
871       PC_GASM_RESTRICT    - full restriction, local processor interpolation
872       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
873       PC_GASM_NONE        - local processor restriction and interpolation
874 .ve
875 
876     Options Database Key:
877 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
878 
879     Level: intermediate
880 
881 .keywords: PC, GASM, set, type
882 
883 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
884           PCGASMCreateSubdomains2D()
885 @*/
886 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
887 {
888   PetscErrorCode ierr;
889 
890   PetscFunctionBegin;
891   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
892   PetscValidLogicalCollectiveEnum(pc,type,2);
893   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "PCGASMSetSortIndices"
899 /*@
900     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
901 
902     Logically Collective on PC
903 
904     Input Parameters:
905 +   pc  - the preconditioner context
906 -   doSort - sort the subdomain indices
907 
908     Level: intermediate
909 
910 .keywords: PC, GASM, set, type
911 
912 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
913           PCGASMCreateSubdomains2D()
914 @*/
915 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
916 {
917   PetscErrorCode ierr;
918 
919   PetscFunctionBegin;
920   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
921   PetscValidLogicalCollectiveBool(pc,doSort,2);
922   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "PCGASMGetSubKSP"
928 /*@C
929    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
930    this processor.
931 
932    Collective on PC iff first_local is requested
933 
934    Input Parameter:
935 .  pc - the preconditioner context
936 
937    Output Parameters:
938 +  n_local - the number of blocks on this processor or PETSC_NULL
939 .  first_local - the global number of the first block on this processor or PETSC_NULL,
940                  all processors must request or all must pass PETSC_NULL
941 -  ksp - the array of KSP contexts
942 
943    Note:
944    After PCGASMGetSubKSP() the array of KSPes is not to be freed
945 
946    Currently for some matrix implementations only 1 block per processor
947    is supported.
948 
949    You must call KSPSetUp() before calling PCGASMGetSubKSP().
950 
951    Level: advanced
952 
953 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
954 
955 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
956           PCGASMCreateSubdomains2D(),
957 @*/
958 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
964   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 /* -------------------------------------------------------------------------------------*/
969 /*MC
970    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
971            its own KSP object.
972 
973    Options Database Keys:
974 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
975 .  -pc_gasm_blocks <blks> - Sets total blocks
976 .  -pc_gasm_overlap <ovl> - Sets overlap
977 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
978 
979      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
980       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
981       -pc_gasm_type basic to use the standard GASM.
982 
983    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
984      than one processor. Defaults to one block per processor.
985 
986      To set options on the solvers for each block append -sub_ to all the KSP, and PC
987         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
988 
989      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
990          and set the options directly on the resulting KSP object (you can access its PC
991          with KSPGetPC())
992 
993 
994    Level: beginner
995 
996    Concepts: additive Schwarz method
997 
998     References:
999     An additive variant of the Schwarz alternating method for the case of many subregions
1000     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1001 
1002     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1003     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1004 
1005 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1006            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1007            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1008 
1009 M*/
1010 
1011 EXTERN_C_BEGIN
1012 #undef __FUNCT__
1013 #define __FUNCT__ "PCCreate_GASM"
1014 PetscErrorCode  PCCreate_GASM(PC pc)
1015 {
1016   PetscErrorCode ierr;
1017   PC_GASM         *osm;
1018 
1019   PetscFunctionBegin;
1020   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1021   osm->N                 = PETSC_DECIDE;
1022   osm->n                 = 0;
1023   osm->nmax              = 0;
1024   osm->overlap           = 1;
1025   osm->ksp               = 0;
1026   osm->grestriction      = 0;
1027   osm->gprolongation     = 0;
1028   osm->gx                = 0;
1029   osm->gy                = 0;
1030   osm->x                 = 0;
1031   osm->y                 = 0;
1032   osm->is                = 0;
1033   osm->is_local          = 0;
1034   osm->pmat              = 0;
1035   osm->type              = PC_GASM_RESTRICT;
1036   osm->same_local_solves = PETSC_TRUE;
1037   osm->sort_indices      = PETSC_TRUE;
1038 
1039   pc->data                   = (void*)osm;
1040   pc->ops->apply             = PCApply_GASM;
1041   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1042   pc->ops->setup             = PCSetUp_GASM;
1043   pc->ops->destroy           = PCDestroy_GASM;
1044   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1045   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1046   pc->ops->view              = PCView_GASM;
1047   pc->ops->applyrichardson   = 0;
1048 
1049   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1050                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1051   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1052                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1054                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1055   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1056                     PCGASMSetType_GASM);CHKERRQ(ierr);
1057   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1058                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1059   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1060                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 EXTERN_C_END
1064 
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "PCGASMCreateSubdomains"
1068 /*@C
1069    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1070    preconditioner for a any problem on a general grid.
1071 
1072    Collective
1073 
1074    Input Parameters:
1075 +  A - The global matrix operator
1076 -  n - the number of local blocks
1077 
1078    Output Parameters:
1079 .  outis - the array of index sets defining the subdomains
1080 
1081    Level: advanced
1082 
1083    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1084     from these if you use PCGASMSetLocalSubdomains()
1085 
1086     In the Fortran version you must provide the array outis[] already allocated of length n.
1087 
1088 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1089 
1090 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1091 @*/
1092 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1093 {
1094   MatPartitioning           mpart;
1095   const char                *prefix;
1096   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1097   PetscMPIInt               size;
1098   PetscInt                  i,j,rstart,rend,bs;
1099   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1100   Mat                       Ad = PETSC_NULL, adj;
1101   IS                        ispart,isnumb,*is;
1102   PetscErrorCode            ierr;
1103 
1104   PetscFunctionBegin;
1105   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1106   PetscValidPointer(outis,3);
1107   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1108 
1109   /* Get prefix, row distribution, and block size */
1110   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1111   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1112   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1113   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1114 
1115   /* Get diagonal block from matrix if possible */
1116   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1117   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1118   if (f) {
1119     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1120   } else if (size == 1) {
1121     iscopy = PETSC_FALSE; Ad = A;
1122   } else {
1123     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1124   }
1125   if (Ad) {
1126     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1127     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1128   }
1129   if (Ad && n > 1) {
1130     PetscBool  match,done;
1131     /* Try to setup a good matrix partitioning if available */
1132     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1133     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1134     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1135     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1136     if (!match) {
1137       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1138     }
1139     if (!match) { /* assume a "good" partitioner is available */
1140       PetscInt na,*ia,*ja;
1141       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1142       if (done) {
1143 	/* Build adjacency matrix by hand. Unfortunately a call to
1144 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1145 	   remove the block-aij structure and we cannot expect
1146 	   MatPartitioning to split vertices as we need */
1147 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1148 	nnz = 0;
1149 	for (i=0; i<na; i++) { /* count number of nonzeros */
1150 	  len = ia[i+1] - ia[i];
1151 	  row = ja + ia[i];
1152 	  for (j=0; j<len; j++) {
1153 	    if (row[j] == i) { /* don't count diagonal */
1154 	      len--; break;
1155 	    }
1156 	  }
1157 	  nnz += len;
1158 	}
1159 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1160 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1161 	nnz    = 0;
1162 	iia[0] = 0;
1163 	for (i=0; i<na; i++) { /* fill adjacency */
1164 	  cnt = 0;
1165 	  len = ia[i+1] - ia[i];
1166 	  row = ja + ia[i];
1167 	  for (j=0; j<len; j++) {
1168 	    if (row[j] != i) { /* if not diagonal */
1169 	      jja[nnz+cnt++] = row[j];
1170 	    }
1171 	  }
1172 	  nnz += cnt;
1173 	  iia[i+1] = nnz;
1174 	}
1175 	/* Partitioning of the adjacency matrix */
1176 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1177 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1178 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1179 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1180 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1181 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1182 	foundpart = PETSC_TRUE;
1183       }
1184       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1185     }
1186     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1187   }
1188   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1189 
1190   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1191   *outis = is;
1192 
1193   if (!foundpart) {
1194 
1195     /* Partitioning by contiguous chunks of rows */
1196 
1197     PetscInt mbs   = (rend-rstart)/bs;
1198     PetscInt start = rstart;
1199     for (i=0; i<n; i++) {
1200       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1201       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1202       start += count;
1203     }
1204 
1205   } else {
1206 
1207     /* Partitioning by adjacency of diagonal block  */
1208 
1209     const PetscInt *numbering;
1210     PetscInt       *count,nidx,*indices,*newidx,start=0;
1211     /* Get node count in each partition */
1212     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1213     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1214     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1215       for (i=0; i<n; i++) count[i] *= bs;
1216     }
1217     /* Build indices from node numbering */
1218     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1219     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1220     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1221     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1222     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1223     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1224     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1225       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1226       for (i=0; i<nidx; i++)
1227 	for (j=0; j<bs; j++)
1228 	  newidx[i*bs+j] = indices[i]*bs + j;
1229       ierr = PetscFree(indices);CHKERRQ(ierr);
1230       nidx   *= bs;
1231       indices = newidx;
1232     }
1233     /* Shift to get global indices */
1234     for (i=0; i<nidx; i++) indices[i] += rstart;
1235 
1236     /* Build the index sets for each block */
1237     for (i=0; i<n; i++) {
1238       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1239       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1240       start += count[i];
1241     }
1242 
1243     ierr = PetscFree(count);
1244     ierr = PetscFree(indices);
1245     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1246     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1247   }
1248 
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "PCGASMDestroySubdomains"
1254 /*@C
1255    PCGASMDestroySubdomains - Destroys the index sets created with
1256    PCGASMCreateSubdomains(). Should be called after setting subdomains
1257    with PCGASMSetLocalSubdomains().
1258 
1259    Collective
1260 
1261    Input Parameters:
1262 +  n - the number of index sets
1263 .  is - the array of index sets
1264 -  is_local - the array of local index sets, can be PETSC_NULL
1265 
1266    Level: advanced
1267 
1268 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1269 
1270 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1271 @*/
1272 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1273 {
1274   PetscInt       i;
1275   PetscErrorCode ierr;
1276   PetscFunctionBegin;
1277   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1278   PetscValidPointer(is,2);
1279   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1280   ierr = PetscFree(is);CHKERRQ(ierr);
1281   if (is_local) {
1282     PetscValidPointer(is_local,3);
1283     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1284     ierr = PetscFree(is_local);CHKERRQ(ierr);
1285   }
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 
1290 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1291 {                                                                                                       \
1292  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1293   /*                                                                                                    \
1294    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1295    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1296    subdomain).                                                                                          \
1297    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1298    of the intersection.                                                                                 \
1299   */                                                                                                    \
1300   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1301   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1302   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1303   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1304   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1305   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1306   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1307   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1308   /* Now compute the size of the local subdomain n. */ \
1309   *n = 0;                                               \
1310   if(*ylow_loc < *yhigh_loc) {                           \
1311     PetscInt width = xright-xleft;                     \
1312     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1313     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1314     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1315   }\
1316 }
1317 
1318 
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1322 /*@
1323    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1324    preconditioner for a two-dimensional problem on a regular grid.
1325 
1326    Collective
1327 
1328    Input Parameters:
1329 +  M, N - the global number of mesh points in the x and y directions
1330 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1331 .  dof - degrees of freedom per node
1332 -  overlap - overlap in mesh lines
1333 
1334    Output Parameters:
1335 +  Nsub - the number of local subdomains created
1336 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1337 -  is_local - array of index sets defining non-overlapping subdomains
1338 
1339    Note:
1340    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1341    preconditioners.  More general related routines are
1342    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1343 
1344    Level: advanced
1345 
1346 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1347 
1348 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1349           PCGASMSetOverlap()
1350 @*/
1351 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1352 {
1353   PetscErrorCode ierr;
1354   PetscMPIInt size, rank;
1355   PetscInt i, j;
1356   PetscInt maxheight, maxwidth;
1357   PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1358   PetscInt ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1359   PetscInt x[2][2], y[2][2], n[2];
1360   PetscInt first, last;
1361   PetscInt nidx, *idx;
1362   PetscInt ii,jj,s,q,d;
1363   PetscInt k,kk;
1364   PetscMPIInt color;
1365   MPI_Comm comm, subcomm;
1366 
1367   IS **iis;
1368   PetscFunctionBegin;
1369   ierr = PetscObjectGetComm((PetscObject)pc, &comm);     CHKERRQ(ierr);
1370   ierr = MPI_Comm_size(comm, &size);                     CHKERRQ(ierr);
1371   ierr = MPI_Comm_rank(comm, &rank);                     CHKERRQ(ierr);
1372   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);  CHKERRQ(ierr);
1373   if(first%dof || last%dof) {
1374     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1375 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1376   }
1377   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1378   s = 0;
1379   ystart = 0;
1380   for (j=0; j<Ndomains; ++j) {
1381     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1382     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1383     /* Vertical domain limits with an overlap. */
1384     ylow = PetscMax(ystart - overlap,0);
1385     yhigh = PetscMin(ystart + maxheight + overlap,N);
1386     xstart = 0;
1387     for (i=0; i<Mdomains; ++i) {
1388       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1389       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1390       /* Horizontal domain limits with an overlap. */
1391       xleft   = PetscMax(xstart - overlap,0);
1392       xright  = PetscMin(xstart + maxwidth + overlap,M);
1393       /*
1394 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1395       */
1396       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1397       if(nidx) {
1398         ++s;
1399       }
1400       xstart += maxwidth;
1401     }/* for(i = 0; i < Mdomains; ++i) */
1402     ystart += maxheight;
1403   }/* for(j = 0; j < Ndomains; ++j) */
1404   /* Now we can allocate the necessary number of ISs. */
1405   *nsub = s;
1406   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);         CHKERRQ(ierr);
1407   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);   CHKERRQ(ierr);
1408   s = 0;
1409   ystart = 0;
1410   for (j=0; j<Ndomains; ++j) {
1411     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1412     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1413     /* Vertical domain limits with an overlap. */
1414     y[0][0] = PetscMax(ystart - overlap,0);
1415     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1416     /* Vertical domain limits without an overlap. */
1417     y[1][0] = ystart;
1418     y[1][1] = PetscMin(ystart + maxheight,N);
1419     xstart = 0;
1420     for (i=0; i<Mdomains; ++i) {
1421       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1422       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1423       /* Horizontal domain limits with an overlap. */
1424       x[0][0]  = PetscMax(xstart - overlap,0);
1425       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1426       /* Horizontal domain limits without an overlap. */
1427       x[1][0] = xstart;
1428       x[1][1] = PetscMin(xstart+maxwidth,M);
1429       /*
1430 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1431 	 Do this twice: first for the domains with overlaps, and once without.
1432 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1433       */
1434       for(q = 0; q < 2; ++q) {
1435 	/*
1436 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1437 	  according to whether the domain with an overlap or without is considered.
1438 	*/
1439 	xleft = x[q][0]; xright = x[q][1];
1440 	ylow  = y[q][0]; yhigh  = y[q][1];
1441         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1442 	nidx *= dof;
1443         n[q] = nidx;
1444         /*
1445          Based on the counted number of indices in the local domain *with an overlap*,
1446          construct a subcommunicator of all the processors supporting this domain.
1447          Observe that a domain with an overlap might have nontrivial local support,
1448          while the domain without an overlap might not.  Hence, the decision to participate
1449          in the subcommunicator must be based on the domain with an overlap.
1450          */
1451 	if(q == 0) {
1452 	  if(nidx) {
1453 	    color = 1;
1454 	  }
1455 	  else {
1456 	    color = MPI_UNDEFINED;
1457 	  }
1458 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr);
1459 	}
1460         /*
1461          Proceed only if the number of local indices *with an overlap* is nonzero.
1462          */
1463         if(n[0]) {
1464           if(q == 0) {
1465             iis = is;
1466           }
1467           if(q == 1) {
1468             /*
1469              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1470              Moreover, if the overlap is zero, the two ISs are identical.
1471              */
1472             if(overlap == 0) {
1473               (*is_local)[s] = (*is)[s];
1474               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1475               continue;
1476             }
1477             else {
1478               iis = is_local;
1479               subcomm = ((PetscObject)(*is)[s])->comm;
1480             }
1481           }/* if(q == 1) */
1482           idx = PETSC_NULL;
1483 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1484           if(nidx) {
1485             k    = 0;
1486             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1487               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1488               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1489               kk = dof*(M*jj + x0);
1490               for (ii=x0; ii<x1; ++ii) {
1491                 for(d = 0; d < dof; ++d) {
1492                   idx[k++] = kk++;
1493                 }
1494               }
1495             }
1496           }
1497 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1498 	}/* if(n[0]) */
1499       }/* for(q = 0; q < 2; ++q) */
1500       if(n[0]) {
1501         ++s;
1502       }
1503       xstart += maxwidth;
1504     }/* for(i = 0; i < Mdomains; ++i) */
1505     ystart += maxheight;
1506   }/* for(j = 0; j < Ndomains; ++j) */
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1512 /*@C
1513     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1514     only) for the additive Schwarz preconditioner.
1515 
1516     Not Collective
1517 
1518     Input Parameter:
1519 .   pc - the preconditioner context
1520 
1521     Output Parameters:
1522 +   n - the number of subdomains for this processor (default value = 1)
1523 .   is - the index sets that define the subdomains for this processor
1524 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1525 
1526 
1527     Notes:
1528     The IS numbering is in the parallel, global numbering of the vector.
1529 
1530     Level: advanced
1531 
1532 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1533 
1534 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1535           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1536 @*/
1537 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1538 {
1539   PC_GASM         *osm;
1540   PetscErrorCode ierr;
1541   PetscBool      match;
1542 
1543   PetscFunctionBegin;
1544   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1545   PetscValidIntPointer(n,2);
1546   if (is) PetscValidPointer(is,3);
1547   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1548   if (!match) {
1549     if (n)  *n  = 0;
1550     if (is) *is = PETSC_NULL;
1551   } else {
1552     osm = (PC_GASM*)pc->data;
1553     if (n)  *n  = osm->n;
1554     if (is) *is = osm->is;
1555     if (is_local) *is_local = osm->is_local;
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1562 /*@C
1563     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1564     only) for the additive Schwarz preconditioner.
1565 
1566     Not Collective
1567 
1568     Input Parameter:
1569 .   pc - the preconditioner context
1570 
1571     Output Parameters:
1572 +   n - the number of matrices for this processor (default value = 1)
1573 -   mat - the matrices
1574 
1575 
1576     Level: advanced
1577 
1578 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1579 
1580 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1581           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1582 @*/
1583 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1584 {
1585   PC_GASM         *osm;
1586   PetscErrorCode ierr;
1587   PetscBool      match;
1588 
1589   PetscFunctionBegin;
1590   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1591   PetscValidIntPointer(n,2);
1592   if (mat) PetscValidPointer(mat,3);
1593   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1594   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1595   if (!match) {
1596     if (n)   *n   = 0;
1597     if (mat) *mat = PETSC_NULL;
1598   } else {
1599     osm = (PC_GASM*)pc->data;
1600     if (n)   *n   = osm->n;
1601     if (mat) *mat = osm->pmat;
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605