xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision d3bc2379953f61570eaba0ea7bf158e61e36f6e5)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <petsc-private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
74     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
75     ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
76     ierr = PetscFree(cidx);CHKERRQ(ierr);
77     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
78     if (osm->is_local) {
79       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
80       ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
81       /*
82        No more than 15 characters per index plus a space.
83        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
84        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
85        For nidx == 0, the whole string 16 '\0'.
86        */
87       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
88       ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
89       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
90       for(j = 0; j < nidx; ++j) {
91         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
92       }
93       ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
94       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
95 
96       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr);
98       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
99       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
100       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
101       ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
102       ierr = PetscFree(cidx);CHKERRQ(ierr);
103       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "PCView_GASM"
112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
113 {
114   PC_GASM         *osm = (PC_GASM*)pc->data;
115   const char     *prefix;
116   PetscErrorCode ierr;
117   PetscMPIInt    rank, size;
118   PetscInt       i,bsz;
119   PetscBool      iascii,isstring, print_subdomains=PETSC_FALSE;
120   PetscViewer    sviewer;
121 
122 
123   PetscFunctionBegin;
124   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
125   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
126   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
127   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
128   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
129   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
130   if (iascii) {
131     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
132     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
133     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
134     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
135     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
136     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
137     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
138     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
139     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
140     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
141     if (osm->same_local_solves) {
142       if (osm->ksp) {
143         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
144         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
145         if (!rank) {
146           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
147           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
148           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
149         }
150         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
151       }
152     } else {
153       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
154       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
155       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
156       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
157       for (i=0; i<osm->nmax; i++) {
158         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
159         if (i < osm->n) {
160           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
161           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr);
162           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
163           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
164           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
165           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
166           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr);
167         }
168         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169       }
170       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
171       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172     }
173   } else if (isstring) {
174     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
175     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
176     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
178   } else {
179     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
180   }
181   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
182   if(print_subdomains) {
183     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 
189 
190 
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "PCSetUp_GASM"
194 static PetscErrorCode PCSetUp_GASM(PC pc)
195 {
196   PC_GASM         *osm  = (PC_GASM*)pc->data;
197   PetscErrorCode ierr;
198   PetscBool      symset,flg;
199   PetscInt       i;
200   PetscMPIInt    rank, size;
201   MatReuse       scall = MAT_REUSE_MATRIX;
202   KSP            ksp;
203   PC             subpc;
204   const char     *prefix,*pprefix;
205   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
206   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
207   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
208   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
209   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
210   Vec            x,y;
211   PetscScalar    *gxarray, *gyarray;
212   PetscInt       gfirst, glast;
213   DM             *domain_dm = PETSC_NULL;
214 
215   PetscFunctionBegin;
216   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
217   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
218   if (!pc->setupcalled) {
219 
220     if (!osm->type_set) {
221       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
222       if (symset && flg) { osm->type = PC_GASM_BASIC; }
223     }
224 
225     if (osm->N == PETSC_DECIDE && osm->n < 1) {
226       /* no subdomains given, try pc->dm */
227       if(pc->dm) {
228         char      ddm_name[1024];
229         DM        ddm;
230         PetscBool flg;
231         PetscInt     num_domains, d;
232         char         **domain_names;
233         IS           *domain_is;
234         /* Allow the user to request a decomposition DM by name */
235         ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr);
236         ierr = PetscOptionsString("-pc_asm_decomposition_dm", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr);
237         if(flg) {
238           ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr);
239           if(!ddm) {
240             SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name);
241           }
242           ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr);
243           ierr = PCSetDM(pc,ddm); CHKERRQ(ierr);
244         }
245         ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm);    CHKERRQ(ierr);
246         ierr = PCGASMSetLocalSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr);
247         for(d = 0; d < num_domains; ++d) {
248           ierr = PetscFree(domain_names[d]); CHKERRQ(ierr);
249           ierr = ISDestroy(&domain_is[d]);   CHKERRQ(ierr);
250         }
251         ierr = PetscFree(domain_names);CHKERRQ(ierr);
252         ierr = PetscFree(domain_is);CHKERRQ(ierr);
253       }
254       else { /* no dm, use one per processor */
255         osm->nmax = osm->n = 1;
256         ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
257         osm->N = size;
258       }
259     } else if (osm->N == PETSC_DECIDE) {
260       PetscInt inwork[2], outwork[2];
261       /* determine global number of subdomains and the max number of local subdomains */
262       inwork[0] = inwork[1] = osm->n;
263       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
264       osm->nmax = outwork[0];
265       osm->N    = outwork[1];
266     }
267     if (!osm->is){ /* create the index sets */
268       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
269     }
270     if (!osm->is_local) {
271       /*
272 	 This indicates that osm->is should define a nonoverlapping decomposition
273 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
274 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
275 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
276 	 Therefore, osm->is will be used to define osm->is_local.
277 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
278 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
279 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
280       */
281       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
282       for (i=0; i<osm->n; i++) {
283         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
284           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
285           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
286         } else {
287           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
288           osm->is_local[i] = osm->is[i];
289         }
290       }
291     }
292     /* Beyond this point osm->is_local is not null. */
293     if (osm->overlap > 0) {
294       /* Extend the "overlapping" regions by a number of steps */
295       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
296     }
297     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
298     flg  = PETSC_FALSE;
299     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
300     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
301 
302     if (osm->sort_indices) {
303       for (i=0; i<osm->n; i++) {
304         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
305 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
306       }
307     }
308     /* Merge the ISs, create merged vectors and scatter contexts. */
309     /* Restriction ISs. */
310     ddn = 0;
311     for (i=0; i<osm->n; i++) {
312       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
313       ddn += dn;
314     }
315     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
316     ddn = 0;
317     for (i=0; i<osm->n; i++) {
318       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
319       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
320       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
321       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
322       ddn += dn;
323     }
324     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
325     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
326     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
327     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
328     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
329     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
330     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
331     ierr = ISDestroy(&gid);CHKERRQ(ierr);
332     /* Prolongation ISs */
333     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
334       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
335       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
336       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
337       /**/
338       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
339       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
340       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
341       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
342       PetscInt               j;
343       ddn_local = 0;
344       for (i=0; i<osm->n; i++) {
345 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
346 	ddn_local += dn_local;
347       }
348       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
349       ddn_local = 0;
350       for (i=0; i<osm->n; i++) {
351 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
352 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
353 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
354 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
355 	ddn_local += dn_local;
356       }
357       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
358       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
359       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
360       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
361       ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
362       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
363       /* Now convert these localized indices into the global indices into the merged output vector. */
364       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
365       for(j=0; j < ddn_llocal; ++j) {
366 	ddidx_llocal[j] += gfirst;
367       }
368       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
369       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
370       ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr);
371     }
372     /* Create the subdomain work vectors. */
373     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
374     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
375     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
376     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
377     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
378     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
379       PetscInt dN;
380       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
381       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
382       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
383       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
384     }
385     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
386     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
387     ierr = VecDestroy(&x);CHKERRQ(ierr);
388     ierr = VecDestroy(&y);CHKERRQ(ierr);
389     /* Create the local solvers */
390     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
391     for (i=0; i<osm->n; i++) { /* KSPs are local */
392       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
393       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
394       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
395       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
396       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
397       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
398       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
399       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
400       osm->ksp[i] = ksp;
401     }
402     scall = MAT_INITIAL_MATRIX;
403 
404   } else {
405     /*
406        Destroy the blocks from the previous iteration
407     */
408     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
409       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
410       scall = MAT_INITIAL_MATRIX;
411     }
412   }
413 
414   /*
415      Extract out the submatrices.
416   */
417   if(size > 1) {
418     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
419   }
420   else {
421     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
422   }
423   if (scall == MAT_INITIAL_MATRIX) {
424     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
425     for (i=0; i<osm->n; i++) {
426       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
427       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
428     }
429   }
430 
431   /* Return control to the user so that the submatrices can be modified (e.g., to apply
432      different boundary conditions for the submatrices than for the global problem) */
433   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
434 
435   /*
436      Loop over submatrices putting them into local ksp
437   */
438   for (i=0; i<osm->n; i++) {
439     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
440     if (!pc->setupcalled) {
441       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
442     }
443   }
444 
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
450 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
451 {
452   PC_GASM         *osm = (PC_GASM*)pc->data;
453   PetscErrorCode ierr;
454   PetscInt       i;
455 
456   PetscFunctionBegin;
457   for (i=0; i<osm->n; i++) {
458     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
459   }
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "PCApply_GASM"
465 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
466 {
467   PC_GASM         *osm = (PC_GASM*)pc->data;
468   PetscErrorCode ierr;
469   PetscInt       i;
470   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
471 
472   PetscFunctionBegin;
473   /*
474      Support for limiting the restriction or interpolation to only local
475      subdomain values (leaving the other values 0).
476   */
477   if (!(osm->type & PC_GASM_RESTRICT)) {
478     forward = SCATTER_FORWARD_LOCAL;
479     /* have to zero the work RHS since scatter may leave some slots empty */
480     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
481   }
482   if (!(osm->type & PC_GASM_INTERPOLATE)) {
483     reverse = SCATTER_REVERSE_LOCAL;
484   }
485 
486   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
487   ierr = VecZeroEntries(y);CHKERRQ(ierr);
488   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
489   /* do the subdomain solves */
490   for (i=0; i<osm->n; ++i) {
491     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
492   }
493   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "PCApplyTranspose_GASM"
500 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
501 {
502   PC_GASM         *osm = (PC_GASM*)pc->data;
503   PetscErrorCode ierr;
504   PetscInt       i;
505   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
506 
507   PetscFunctionBegin;
508   /*
509      Support for limiting the restriction or interpolation to only local
510      subdomain values (leaving the other values 0).
511 
512      Note: these are reversed from the PCApply_GASM() because we are applying the
513      transpose of the three terms
514   */
515   if (!(osm->type & PC_GASM_INTERPOLATE)) {
516     forward = SCATTER_FORWARD_LOCAL;
517     /* have to zero the work RHS since scatter may leave some slots empty */
518     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
519   }
520   if (!(osm->type & PC_GASM_RESTRICT)) {
521     reverse = SCATTER_REVERSE_LOCAL;
522   }
523 
524   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
525   ierr = VecZeroEntries(y);CHKERRQ(ierr);
526   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
527   /* do the local solves */
528   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
529     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
530   }
531   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
532   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "PCReset_GASM"
538 static PetscErrorCode PCReset_GASM(PC pc)
539 {
540   PC_GASM        *osm = (PC_GASM*)pc->data;
541   PetscErrorCode ierr;
542   PetscInt       i;
543 
544   PetscFunctionBegin;
545   if (osm->ksp) {
546     for (i=0; i<osm->n; i++) {
547       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
548     }
549   }
550   if (osm->pmat) {
551     if (osm->n > 0) {
552       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
553     }
554   }
555   if (osm->x) {
556     for (i=0; i<osm->n; i++) {
557       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
558       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
559     }
560   }
561   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
562   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
563 
564   ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr);
565   ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr);
566   if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;}
567   ierr = ISDestroy(&osm->gis);CHKERRQ(ierr);
568   ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "PCDestroy_GASM"
574 static PetscErrorCode PCDestroy_GASM(PC pc)
575 {
576   PC_GASM         *osm = (PC_GASM*)pc->data;
577   PetscErrorCode ierr;
578   PetscInt       i;
579 
580   PetscFunctionBegin;
581   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
582   if (osm->ksp) {
583     for (i=0; i<osm->n; i++) {
584       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
585     }
586     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
587   }
588   ierr = PetscFree(osm->x);CHKERRQ(ierr);
589   ierr = PetscFree(osm->y);CHKERRQ(ierr);
590   ierr = PetscFree(pc->data);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "PCSetFromOptions_GASM"
596 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
597   PC_GASM         *osm = (PC_GASM*)pc->data;
598   PetscErrorCode ierr;
599   PetscInt       blocks,ovl;
600   PetscBool      symset,flg;
601   PCGASMType      gasmtype;
602 
603   PetscFunctionBegin;
604   /* set the type to symmetric if matrix is symmetric */
605   if (!osm->type_set && pc->pmat) {
606     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
607     if (symset && flg) { osm->type = PC_GASM_BASIC; }
608   }
609   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
610     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
611     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
612     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
613     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
614     flg  = PETSC_FALSE;
615     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
616     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
617   ierr = PetscOptionsTail();CHKERRQ(ierr);
618   PetscFunctionReturn(0);
619 }
620 
621 /*------------------------------------------------------------------------------------*/
622 
623 EXTERN_C_BEGIN
624 #undef __FUNCT__
625 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
626 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
627 {
628   PC_GASM         *osm = (PC_GASM*)pc->data;
629   PetscErrorCode ierr;
630   PetscInt       i;
631 
632   PetscFunctionBegin;
633   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
634   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
635 
636   if (!pc->setupcalled) {
637     osm->n            = n;
638     osm->is           = 0;
639     osm->is_local     = 0;
640     if (is) {
641       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
642     }
643     if (is_local) {
644       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
645     }
646     if (osm->is) {
647       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
648     }
649     if (is) {
650       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
651       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
652       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
653       osm->overlap = -1;
654     }
655     if (is_local) {
656       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
657       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
658     }
659   }
660   PetscFunctionReturn(0);
661 }
662 EXTERN_C_END
663 
664 EXTERN_C_BEGIN
665 #undef __FUNCT__
666 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
667 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
668   PC_GASM         *osm = (PC_GASM*)pc->data;
669   PetscErrorCode ierr;
670   PetscMPIInt    rank,size;
671   PetscInt       n;
672 
673   PetscFunctionBegin;
674   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
675 
676   /*
677      Split the subdomains equally among all processors
678   */
679   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
680   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
681   n = N/size + ((N % size) > rank);
682   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
683   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
684   if (!pc->setupcalled) {
685     if (osm->is) {
686       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
687     }
688     osm->N            = N;
689     osm->n            = n;
690     osm->is           = 0;
691     osm->is_local     = 0;
692   }
693   PetscFunctionReturn(0);
694 }/* PCGASMSetTotalSubdomains_GASM() */
695 EXTERN_C_END
696 
697 EXTERN_C_BEGIN
698 #undef __FUNCT__
699 #define __FUNCT__ "PCGASMSetOverlap_GASM"
700 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
701 {
702   PC_GASM *osm = (PC_GASM*)pc->data;
703 
704   PetscFunctionBegin;
705   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
706   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
707   if (!pc->setupcalled) {
708     osm->overlap = ovl;
709   }
710   PetscFunctionReturn(0);
711 }
712 EXTERN_C_END
713 
714 EXTERN_C_BEGIN
715 #undef __FUNCT__
716 #define __FUNCT__ "PCGASMSetType_GASM"
717 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
718 {
719   PC_GASM *osm = (PC_GASM*)pc->data;
720 
721   PetscFunctionBegin;
722   osm->type     = type;
723   osm->type_set = PETSC_TRUE;
724   PetscFunctionReturn(0);
725 }
726 EXTERN_C_END
727 
728 EXTERN_C_BEGIN
729 #undef __FUNCT__
730 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
731 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
732 {
733   PC_GASM *osm = (PC_GASM*)pc->data;
734 
735   PetscFunctionBegin;
736   osm->sort_indices = doSort;
737   PetscFunctionReturn(0);
738 }
739 EXTERN_C_END
740 
741 EXTERN_C_BEGIN
742 #undef __FUNCT__
743 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
744 /*
745    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
746         In particular, it would upset the global subdomain number calculation.
747 */
748 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
749 {
750   PC_GASM         *osm = (PC_GASM*)pc->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
755 
756   if (n) {
757     *n = osm->n;
758   }
759   if (first) {
760     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
761     *first -= osm->n;
762   }
763   if (ksp) {
764     /* Assume that local solves are now different; not necessarily
765        true though!  This flag is used only for PCView_GASM() */
766     *ksp                   = osm->ksp;
767     osm->same_local_solves = PETSC_FALSE;
768   }
769   PetscFunctionReturn(0);
770 }/* PCGASMGetSubKSP_GASM() */
771 EXTERN_C_END
772 
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "PCGASMSetLocalSubdomains"
776 /*@C
777     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
778     only) for the additive Schwarz preconditioner.
779 
780     Collective on PC
781 
782     Input Parameters:
783 +   pc - the preconditioner context
784 .   n - the number of subdomains for this processor (default value = 1)
785 .   is - the index set that defines the subdomains for this processor
786          (or PETSC_NULL for PETSc to determine subdomains)
787 -   is_local - the index sets that define the local part of the subdomains for this processor
788          (or PETSC_NULL to use the default of 1 subdomain per process)
789 
790     Notes:
791     The IS numbering is in the parallel, global numbering of the vector.
792 
793     By default the GASM preconditioner uses 1 block per processor.
794 
795     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
796 
797     Level: advanced
798 
799 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
800 
801 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
802           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
803 @*/
804 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
805 {
806   PetscErrorCode ierr;
807 
808   PetscFunctionBegin;
809   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
810   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "PCGASMSetTotalSubdomains"
816 /*@C
817     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
818     additive Schwarz preconditioner.  Either all or no processors in the
819     PC communicator must call this routine, with the same index sets.
820 
821     Collective on PC
822 
823     Input Parameters:
824 +   pc - the preconditioner context
825 .   n - the number of subdomains for all processors
826 .   is - the index sets that define the subdomains for all processor
827          (or PETSC_NULL for PETSc to determine subdomains)
828 -   is_local - the index sets that define the local part of the subdomains for this processor
829          (or PETSC_NULL to use the default of 1 subdomain per process)
830 
831     Options Database Key:
832     To set the total number of subdomain blocks rather than specify the
833     index sets, use the option
834 .    -pc_gasm_blocks <blks> - Sets total blocks
835 
836     Notes:
837     Currently you cannot use this to set the actual subdomains with the argument is.
838 
839     By default the GASM preconditioner uses 1 block per processor.
840 
841     These index sets cannot be destroyed until after completion of the
842     linear solves for which the GASM preconditioner is being used.
843 
844     Use PCGASMSetLocalSubdomains() to set local subdomains.
845 
846     Level: advanced
847 
848 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
849 
850 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
851           PCGASMCreateSubdomains2D()
852 @*/
853 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
859   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "PCGASMSetOverlap"
865 /*@
866     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
867     additive Schwarz preconditioner.  Either all or no processors in the
868     PC communicator must call this routine.
869 
870     Logically Collective on PC
871 
872     Input Parameters:
873 +   pc  - the preconditioner context
874 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
875 
876     Options Database Key:
877 .   -pc_gasm_overlap <ovl> - Sets overlap
878 
879     Notes:
880     By default the GASM preconditioner uses 1 block per processor.  To use
881     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
882     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
883 
884     The overlap defaults to 1, so if one desires that no additional
885     overlap be computed beyond what may have been set with a call to
886     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
887     must be set to be 0.  In particular, if one does not explicitly set
888     the subdomains an application code, then all overlap would be computed
889     internally by PETSc, and using an overlap of 0 would result in an GASM
890     variant that is equivalent to the block Jacobi preconditioner.
891 
892     Note that one can define initial index sets with any overlap via
893     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
894     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
895     if desired.
896 
897     Level: intermediate
898 
899 .keywords: PC, GASM, set, overlap
900 
901 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
902           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
903 @*/
904 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
905 {
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
910   PetscValidLogicalCollectiveInt(pc,ovl,2);
911   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "PCGASMSetType"
917 /*@
918     PCGASMSetType - Sets the type of restriction and interpolation used
919     for local problems in the additive Schwarz method.
920 
921     Logically Collective on PC
922 
923     Input Parameters:
924 +   pc  - the preconditioner context
925 -   type - variant of GASM, one of
926 .vb
927       PC_GASM_BASIC       - full interpolation and restriction
928       PC_GASM_RESTRICT    - full restriction, local processor interpolation
929       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
930       PC_GASM_NONE        - local processor restriction and interpolation
931 .ve
932 
933     Options Database Key:
934 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
935 
936     Level: intermediate
937 
938 .keywords: PC, GASM, set, type
939 
940 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
941           PCGASMCreateSubdomains2D()
942 @*/
943 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
944 {
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
949   PetscValidLogicalCollectiveEnum(pc,type,2);
950   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "PCGASMSetSortIndices"
956 /*@
957     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
958 
959     Logically Collective on PC
960 
961     Input Parameters:
962 +   pc  - the preconditioner context
963 -   doSort - sort the subdomain indices
964 
965     Level: intermediate
966 
967 .keywords: PC, GASM, set, type
968 
969 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
970           PCGASMCreateSubdomains2D()
971 @*/
972 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
973 {
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
978   PetscValidLogicalCollectiveBool(pc,doSort,2);
979   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "PCGASMGetSubKSP"
985 /*@C
986    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
987    this processor.
988 
989    Collective on PC iff first_local is requested
990 
991    Input Parameter:
992 .  pc - the preconditioner context
993 
994    Output Parameters:
995 +  n_local - the number of blocks on this processor or PETSC_NULL
996 .  first_local - the global number of the first block on this processor or PETSC_NULL,
997                  all processors must request or all must pass PETSC_NULL
998 -  ksp - the array of KSP contexts
999 
1000    Note:
1001    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1002 
1003    Currently for some matrix implementations only 1 block per processor
1004    is supported.
1005 
1006    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1007 
1008    Level: advanced
1009 
1010 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1011 
1012 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
1013           PCGASMCreateSubdomains2D(),
1014 @*/
1015 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1016 {
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1021   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /* -------------------------------------------------------------------------------------*/
1026 /*MC
1027    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1028            its own KSP object.
1029 
1030    Options Database Keys:
1031 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
1032 .  -pc_gasm_blocks <blks> - Sets total blocks
1033 .  -pc_gasm_overlap <ovl> - Sets overlap
1034 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1035 
1036      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1037       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1038       -pc_gasm_type basic to use the standard GASM.
1039 
1040    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1041      than one processor. Defaults to one block per processor.
1042 
1043      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1044         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1045 
1046      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1047          and set the options directly on the resulting KSP object (you can access its PC
1048          with KSPGetPC())
1049 
1050 
1051    Level: beginner
1052 
1053    Concepts: additive Schwarz method
1054 
1055     References:
1056     An additive variant of the Schwarz alternating method for the case of many subregions
1057     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1058 
1059     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1060     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1061 
1062 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1063            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1064            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1065 
1066 M*/
1067 
1068 EXTERN_C_BEGIN
1069 #undef __FUNCT__
1070 #define __FUNCT__ "PCCreate_GASM"
1071 PetscErrorCode  PCCreate_GASM(PC pc)
1072 {
1073   PetscErrorCode ierr;
1074   PC_GASM         *osm;
1075 
1076   PetscFunctionBegin;
1077   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1078   osm->N                 = PETSC_DECIDE;
1079   osm->n                 = 0;
1080   osm->nmax              = 0;
1081   osm->overlap           = 1;
1082   osm->ksp               = 0;
1083   osm->grestriction      = 0;
1084   osm->gprolongation     = 0;
1085   osm->gx                = 0;
1086   osm->gy                = 0;
1087   osm->x                 = 0;
1088   osm->y                 = 0;
1089   osm->is                = 0;
1090   osm->is_local          = 0;
1091   osm->pmat              = 0;
1092   osm->type              = PC_GASM_RESTRICT;
1093   osm->same_local_solves = PETSC_TRUE;
1094   osm->sort_indices      = PETSC_TRUE;
1095 
1096   pc->data                   = (void*)osm;
1097   pc->ops->apply             = PCApply_GASM;
1098   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1099   pc->ops->setup             = PCSetUp_GASM;
1100   pc->ops->reset             = PCReset_GASM;
1101   pc->ops->destroy           = PCDestroy_GASM;
1102   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1103   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1104   pc->ops->view              = PCView_GASM;
1105   pc->ops->applyrichardson   = 0;
1106 
1107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1108                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1110                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1112                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1114                     PCGASMSetType_GASM);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1116                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1118                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 EXTERN_C_END
1122 
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "PCGASMCreateSubdomains"
1126 /*@C
1127    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1128    preconditioner for a any problem on a general grid.
1129 
1130    Collective
1131 
1132    Input Parameters:
1133 +  A - The global matrix operator
1134 -  n - the number of local blocks
1135 
1136    Output Parameters:
1137 .  outis - the array of index sets defining the subdomains
1138 
1139    Level: advanced
1140 
1141    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1142     from these if you use PCGASMSetLocalSubdomains()
1143 
1144     In the Fortran version you must provide the array outis[] already allocated of length n.
1145 
1146 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1147 
1148 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1149 @*/
1150 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1151 {
1152   MatPartitioning           mpart;
1153   const char                *prefix;
1154   PetscErrorCode            (*f)(Mat,MatReuse,Mat*);
1155   PetscMPIInt               size;
1156   PetscInt                  i,j,rstart,rend,bs;
1157   PetscBool                 isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1158   Mat                       Ad = PETSC_NULL, adj;
1159   IS                        ispart,isnumb,*is;
1160   PetscErrorCode            ierr;
1161 
1162   PetscFunctionBegin;
1163   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1164   PetscValidPointer(outis,3);
1165   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1166 
1167   /* Get prefix, row distribution, and block size */
1168   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1169   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1170   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1171   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1172 
1173   /* Get diagonal block from matrix if possible */
1174   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1175   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1176   if (f) {
1177     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1178   } else if (size == 1) {
1179     Ad = A;
1180   }
1181   if (Ad) {
1182     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1183     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1184   }
1185   if (Ad && n > 1) {
1186     PetscBool  match,done;
1187     /* Try to setup a good matrix partitioning if available */
1188     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1189     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1190     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1191     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1192     if (!match) {
1193       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1194     }
1195     if (!match) { /* assume a "good" partitioner is available */
1196       PetscInt na,*ia,*ja;
1197       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1198       if (done) {
1199         /* Build adjacency matrix by hand. Unfortunately a call to
1200            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1201            remove the block-aij structure and we cannot expect
1202            MatPartitioning to split vertices as we need */
1203         PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1204         nnz = 0;
1205         for (i=0; i<na; i++) { /* count number of nonzeros */
1206           len = ia[i+1] - ia[i];
1207           row = ja + ia[i];
1208           for (j=0; j<len; j++) {
1209             if (row[j] == i) { /* don't count diagonal */
1210               len--; break;
1211             }
1212           }
1213           nnz += len;
1214         }
1215         ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1216         ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1217         nnz    = 0;
1218         iia[0] = 0;
1219         for (i=0; i<na; i++) { /* fill adjacency */
1220           cnt = 0;
1221           len = ia[i+1] - ia[i];
1222           row = ja + ia[i];
1223           for (j=0; j<len; j++) {
1224             if (row[j] != i) { /* if not diagonal */
1225               jja[nnz+cnt++] = row[j];
1226             }
1227           }
1228           nnz += cnt;
1229           iia[i+1] = nnz;
1230         }
1231         /* Partitioning of the adjacency matrix */
1232         ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1233         ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1234         ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1235         ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1236         ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1237         ierr = MatDestroy(&adj);CHKERRQ(ierr);
1238         foundpart = PETSC_TRUE;
1239       }
1240       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1241     }
1242     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1243   }
1244 
1245   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1246   *outis = is;
1247 
1248   if (!foundpart) {
1249 
1250     /* Partitioning by contiguous chunks of rows */
1251 
1252     PetscInt mbs   = (rend-rstart)/bs;
1253     PetscInt start = rstart;
1254     for (i=0; i<n; i++) {
1255       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1256       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1257       start += count;
1258     }
1259 
1260   } else {
1261 
1262     /* Partitioning by adjacency of diagonal block  */
1263 
1264     const PetscInt *numbering;
1265     PetscInt       *count,nidx,*indices,*newidx,start=0;
1266     /* Get node count in each partition */
1267     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1268     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1269     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1270       for (i=0; i<n; i++) count[i] *= bs;
1271     }
1272     /* Build indices from node numbering */
1273     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1274     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1275     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1276     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1277     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1278     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1279     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1280       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1281       for (i=0; i<nidx; i++)
1282         for (j=0; j<bs; j++)
1283           newidx[i*bs+j] = indices[i]*bs + j;
1284       ierr = PetscFree(indices);CHKERRQ(ierr);
1285       nidx   *= bs;
1286       indices = newidx;
1287     }
1288     /* Shift to get global indices */
1289     for (i=0; i<nidx; i++) indices[i] += rstart;
1290 
1291     /* Build the index sets for each block */
1292     for (i=0; i<n; i++) {
1293       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1294       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1295       start += count[i];
1296     }
1297 
1298     ierr = PetscFree(count);
1299     ierr = PetscFree(indices);
1300     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1301     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1302   }
1303 
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "PCGASMDestroySubdomains"
1309 /*@C
1310    PCGASMDestroySubdomains - Destroys the index sets created with
1311    PCGASMCreateSubdomains(). Should be called after setting subdomains
1312    with PCGASMSetLocalSubdomains().
1313 
1314    Collective
1315 
1316    Input Parameters:
1317 +  n - the number of index sets
1318 .  is - the array of index sets
1319 -  is_local - the array of local index sets, can be PETSC_NULL
1320 
1321    Level: advanced
1322 
1323 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1324 
1325 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1326 @*/
1327 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1328 {
1329   PetscInt       i;
1330   PetscErrorCode ierr;
1331   PetscFunctionBegin;
1332   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1333   PetscValidPointer(is,2);
1334   for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); }
1335   ierr = PetscFree(is);CHKERRQ(ierr);
1336   if (is_local) {
1337     PetscValidPointer(is_local,3);
1338     for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); }
1339     ierr = PetscFree(is_local);CHKERRQ(ierr);
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 
1345 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1346 {                                                                                                       \
1347  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1348   /*                                                                                                    \
1349    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1350    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1351    subdomain).                                                                                          \
1352    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1353    of the intersection.                                                                                 \
1354   */                                                                                                    \
1355   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1356   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1357   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1358   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1359   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1360   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1361   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1362   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1363   /* Now compute the size of the local subdomain n. */ \
1364   *n = 0;                                               \
1365   if(*ylow_loc < *yhigh_loc) {                           \
1366     PetscInt width = xright-xleft;                     \
1367     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1368     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1369     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1370   }\
1371 }
1372 
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1377 /*@
1378    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1379    preconditioner for a two-dimensional problem on a regular grid.
1380 
1381    Collective
1382 
1383    Input Parameters:
1384 +  M, N - the global number of mesh points in the x and y directions
1385 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1386 .  dof - degrees of freedom per node
1387 -  overlap - overlap in mesh lines
1388 
1389    Output Parameters:
1390 +  Nsub - the number of local subdomains created
1391 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1392 -  is_local - array of index sets defining non-overlapping subdomains
1393 
1394    Note:
1395    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1396    preconditioners.  More general related routines are
1397    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1398 
1399    Level: advanced
1400 
1401 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1402 
1403 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1404           PCGASMSetOverlap()
1405 @*/
1406 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1407 {
1408   PetscErrorCode ierr;
1409   PetscMPIInt    size, rank;
1410   PetscInt       i, j;
1411   PetscInt       maxheight, maxwidth;
1412   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1413   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1414   PetscInt       x[2][2], y[2][2], n[2];
1415   PetscInt       first, last;
1416   PetscInt       nidx, *idx;
1417   PetscInt       ii,jj,s,q,d;
1418   PetscInt       k,kk;
1419   PetscMPIInt    color;
1420   MPI_Comm       comm, subcomm;
1421   IS             **iis = 0;
1422 
1423   PetscFunctionBegin;
1424   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1425   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1426   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1427   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1428   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1429 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1430 
1431   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1432   s = 0;
1433   ystart = 0;
1434   for (j=0; j<Ndomains; ++j) {
1435     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1436     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1437     /* Vertical domain limits with an overlap. */
1438     ylow = PetscMax(ystart - overlap,0);
1439     yhigh = PetscMin(ystart + maxheight + overlap,N);
1440     xstart = 0;
1441     for (i=0; i<Mdomains; ++i) {
1442       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1443       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1444       /* Horizontal domain limits with an overlap. */
1445       xleft   = PetscMax(xstart - overlap,0);
1446       xright  = PetscMin(xstart + maxwidth + overlap,M);
1447       /*
1448 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1449       */
1450       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1451       if(nidx) {
1452         ++s;
1453       }
1454       xstart += maxwidth;
1455     }/* for(i = 0; i < Mdomains; ++i) */
1456     ystart += maxheight;
1457   }/* for(j = 0; j < Ndomains; ++j) */
1458   /* Now we can allocate the necessary number of ISs. */
1459   *nsub = s;
1460   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1461   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1462   s = 0;
1463   ystart = 0;
1464   for (j=0; j<Ndomains; ++j) {
1465     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1466     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1467     /* Vertical domain limits with an overlap. */
1468     y[0][0] = PetscMax(ystart - overlap,0);
1469     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1470     /* Vertical domain limits without an overlap. */
1471     y[1][0] = ystart;
1472     y[1][1] = PetscMin(ystart + maxheight,N);
1473     xstart = 0;
1474     for (i=0; i<Mdomains; ++i) {
1475       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1476       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1477       /* Horizontal domain limits with an overlap. */
1478       x[0][0]  = PetscMax(xstart - overlap,0);
1479       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1480       /* Horizontal domain limits without an overlap. */
1481       x[1][0] = xstart;
1482       x[1][1] = PetscMin(xstart+maxwidth,M);
1483       /*
1484 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1485 	 Do this twice: first for the domains with overlaps, and once without.
1486 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1487       */
1488       for(q = 0; q < 2; ++q) {
1489 	/*
1490 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1491 	  according to whether the domain with an overlap or without is considered.
1492 	*/
1493 	xleft = x[q][0]; xright = x[q][1];
1494 	ylow  = y[q][0]; yhigh  = y[q][1];
1495         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1496 	nidx *= dof;
1497         n[q] = nidx;
1498         /*
1499          Based on the counted number of indices in the local domain *with an overlap*,
1500          construct a subcommunicator of all the processors supporting this domain.
1501          Observe that a domain with an overlap might have nontrivial local support,
1502          while the domain without an overlap might not.  Hence, the decision to participate
1503          in the subcommunicator must be based on the domain with an overlap.
1504          */
1505 	if (q == 0) {
1506 	  if(nidx) {
1507 	    color = 1;
1508 	  } else {
1509 	    color = MPI_UNDEFINED;
1510 	  }
1511 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1512 	}
1513         /*
1514          Proceed only if the number of local indices *with an overlap* is nonzero.
1515          */
1516         if (n[0]) {
1517           if(q == 0) {
1518             iis = is;
1519           }
1520           if (q == 1) {
1521             /*
1522              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1523              Moreover, if the overlap is zero, the two ISs are identical.
1524              */
1525             if (overlap == 0) {
1526               (*is_local)[s] = (*is)[s];
1527               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1528               continue;
1529             } else {
1530               iis = is_local;
1531               subcomm = ((PetscObject)(*is)[s])->comm;
1532             }
1533           }/* if(q == 1) */
1534           idx = PETSC_NULL;
1535 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1536           if(nidx) {
1537             k    = 0;
1538             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1539               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1540               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1541               kk = dof*(M*jj + x0);
1542               for (ii=x0; ii<x1; ++ii) {
1543                 for(d = 0; d < dof; ++d) {
1544                   idx[k++] = kk++;
1545                 }
1546               }
1547             }
1548           }
1549 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1550 	}/* if(n[0]) */
1551       }/* for(q = 0; q < 2; ++q) */
1552       if(n[0]) {
1553         ++s;
1554       }
1555       xstart += maxwidth;
1556     }/* for(i = 0; i < Mdomains; ++i) */
1557     ystart += maxheight;
1558   }/* for(j = 0; j < Ndomains; ++j) */
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1564 /*@C
1565     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1566     only) for the additive Schwarz preconditioner.
1567 
1568     Not Collective
1569 
1570     Input Parameter:
1571 .   pc - the preconditioner context
1572 
1573     Output Parameters:
1574 +   n - the number of subdomains for this processor (default value = 1)
1575 .   is - the index sets that define the subdomains for this processor
1576 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1577 
1578 
1579     Notes:
1580     The IS numbering is in the parallel, global numbering of the vector.
1581 
1582     Level: advanced
1583 
1584 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1585 
1586 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1587           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1588 @*/
1589 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1590 {
1591   PC_GASM         *osm;
1592   PetscErrorCode ierr;
1593   PetscBool      match;
1594 
1595   PetscFunctionBegin;
1596   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1597   PetscValidIntPointer(n,2);
1598   if (is) PetscValidPointer(is,3);
1599   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1600   if (!match) {
1601     if (n)  *n  = 0;
1602     if (is) *is = PETSC_NULL;
1603   } else {
1604     osm = (PC_GASM*)pc->data;
1605     if (n)  *n  = osm->n;
1606     if (is) *is = osm->is;
1607     if (is_local) *is_local = osm->is_local;
1608   }
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 #undef __FUNCT__
1613 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1614 /*@C
1615     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1616     only) for the additive Schwarz preconditioner.
1617 
1618     Not Collective
1619 
1620     Input Parameter:
1621 .   pc - the preconditioner context
1622 
1623     Output Parameters:
1624 +   n - the number of matrices for this processor (default value = 1)
1625 -   mat - the matrices
1626 
1627 
1628     Level: advanced
1629 
1630 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1631 
1632 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1633           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1634 @*/
1635 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1636 {
1637   PC_GASM         *osm;
1638   PetscErrorCode ierr;
1639   PetscBool      match;
1640 
1641   PetscFunctionBegin;
1642   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1643   PetscValidIntPointer(n,2);
1644   if (mat) PetscValidPointer(mat,3);
1645   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1646   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1647   if (!match) {
1648     if (n)   *n   = 0;
1649     if (mat) *mat = PETSC_NULL;
1650   } else {
1651     osm = (PC_GASM*)pc->data;
1652     if (n)   *n   = osm->n;
1653     if (mat) *mat = osm->pmat;
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657