xref: /petsc/src/ksp/pc/impls/asm/asm.c (revision 53888de87b5439a0ffb06f399d7b8a068b7d2944)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an additive Schwarz preconditioner for any Mat implementation.
5 
6   Note that each processor may have any number of subdomains. But in order to
7   deal easily with the VecScatter(), we treat each processor as if it has the
8   same number of subdomains.
9 
10        n - total number of true subdomains on all processors
11        n_local_true - actual number of subdomains on this processor
12        n_local = maximum over all processors of n_local_true
13 */
14 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
15 
16 typedef struct {
17   PetscInt   n, n_local, n_local_true;
18   PetscInt   overlap;             /* overlap requested by user */
19   KSP        *ksp;                /* linear solvers for each block */
20   VecScatter *restriction;        /* mapping from global to subregion */
21   VecScatter *localization;       /* mapping from overlapping to non-overlapping subregion */
22   VecScatter *prolongation;       /* mapping from subregion to global */
23   Vec        *x,*y,*y_local;      /* work vectors */
24   IS         *is;                 /* index set that defines each overlapping subdomain */
25   IS         *is_local;           /* index set that defines each non-overlapping subdomain, may be NULL */
26   Mat        *mat,*pmat;          /* mat is not currently used */
27   PCASMType  type;                /* use reduced interpolation, restriction or both */
28   PetscTruth type_set;            /* if user set this value (so won't change it for symmetric problems) */
29   PetscTruth same_local_solves;   /* flag indicating whether all local solvers are same */
30 } PC_ASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCView_ASM"
34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35 {
36   PC_ASM         *osm = (PC_ASM*)pc->data;
37   PetscErrorCode ierr;
38   PetscMPIInt    rank;
39   PetscInt       i,bsz;
40   PetscTruth     iascii,isstring;
41   PetscViewer    sviewer;
42 
43 
44   PetscFunctionBegin;
45   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
46   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
47   if (iascii) {
48     if (osm->n > 0) {
49       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr);
50     } else {
51       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr);
54     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
55     if (osm->same_local_solves) {
56       if (osm->ksp) {
57         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr);
58         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
59         if (!rank) {
60           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
61           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
62           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
63         }
64         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
65       }
66     } else {
67       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr);
68       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
69       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
70       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
71       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
72       for (i=0; i<osm->n_local; i++) {
73         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
74         if (i < osm->n_local_true) {
75           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
76           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr);
77           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
78           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
79         }
80         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
81       }
82       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
83       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
84     }
85   } else if (isstring) {
86     ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr);
87     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
88     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
89     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
90   } else {
91     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "PCASMPrintSubdomains"
98 static PetscErrorCode PCASMPrintSubdomains(PC pc)
99 {
100   PC_ASM         *osm  = (PC_ASM*)pc->data;
101   const char     *prefix;
102   char           fname[PETSC_MAX_PATH_LEN+1];
103   PetscViewer    viewer;
104   PetscInt       i,j,nidx;
105   const PetscInt *idx;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
110   ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr);
111   if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
112   ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr);
113   for (i=0;i<osm->n_local_true;i++) {
114     ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr);
115     ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr);
116     for (j=0; j<nidx; j++) {
117       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
118     }
119     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
120     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
121     if (osm->is_local) {
122       ierr = ISGetLocalSize(osm->is_local[i],&nidx);CHKERRQ(ierr);
123       ierr = ISGetIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
124       for (j=0; j<nidx; j++) {
125         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
126       }
127       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
128       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
129     }
130   }
131   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
132   ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "PCSetUp_ASM"
138 static PetscErrorCode PCSetUp_ASM(PC pc)
139 {
140   PC_ASM         *osm  = (PC_ASM*)pc->data;
141   PetscErrorCode ierr;
142   PetscTruth     symset,flg;
143   PetscInt       i,m,firstRow,lastRow;
144   PetscMPIInt    size;
145   MatReuse       scall = MAT_REUSE_MATRIX;
146   IS             isl;
147   KSP            ksp;
148   PC             subpc;
149   const char     *prefix,*pprefix;
150   Vec            vec;
151 
152   PetscFunctionBegin;
153   if (!pc->setupcalled) {
154 
155     if (!osm->type_set) {
156       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
157       if (symset && flg) { osm->type = PC_ASM_BASIC; }
158     }
159 
160     if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) {
161       /* no subdomains given, use one per processor */
162       osm->n_local = osm->n_local_true = 1;
163       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
164       osm->n = size;
165     } else if (osm->n == PETSC_DECIDE) {
166       /* determine global number of subdomains */
167       PetscInt inwork[2],outwork[2];
168       inwork[0] = inwork[1] = osm->n_local_true;
169       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
170       osm->n_local = outwork[0];
171       osm->n       = outwork[1];
172     }
173 
174     if (!osm->is){ /* create the index sets */
175       ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr);
176     }
177     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
178     flg  = PETSC_FALSE;
179     ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
180     if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); }
181 
182     /*  Extend the "overlapping" regions by a number of steps  */
183     ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr);
184     for (i=0; i<osm->n_local_true; i++) {
185       ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
186       if (osm->is_local) {
187         ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
188       }
189     }
190 
191     /* Create the local work vectors and scatter contexts */
192     ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
193     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->restriction);CHKERRQ(ierr);
194     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->localization);CHKERRQ(ierr);
195     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->prolongation);CHKERRQ(ierr);
196     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr);
197     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr);
198     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y_local);CHKERRQ(ierr);
199     ierr = VecGetOwnershipRange(vec, &firstRow, &lastRow);CHKERRQ(ierr);
200     for (i=0; i<osm->n_local_true; ++i, firstRow += m) {
201       ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr);
202       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr);
203       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
204       ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr);
205       ierr = ISDestroy(isl);CHKERRQ(ierr);
206       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
207       if (osm->is_local) {
208         IS              isShift;
209         const PetscInt *idx_local;
210         PetscInt       *idx, j;
211 
212         ierr = ISGetLocalSize(osm->is_local[i],&m);CHKERRQ(ierr);
213         ierr = ISGetIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr);
214         ierr = PetscMalloc(m*sizeof(PetscInt), &idx);CHKERRQ(ierr);
215         for(j = 0; j < m; ++j) {
216           idx[j] = idx_local[j] - firstRow;
217         }
218         ierr = ISRestoreIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr);
219         ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,&isShift);CHKERRQ(ierr);
220         ierr = PetscFree(idx);CHKERRQ(ierr);
221         ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
222         ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->y_local[i]);CHKERRQ(ierr);
223         ierr = VecScatterCreate(osm->y[i],isShift,osm->y_local[i],isl,&osm->localization[i]);CHKERRQ(ierr);
224         ierr = ISDestroy(isShift);CHKERRQ(ierr);
225 
226         ierr = VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);CHKERRQ(ierr);
227         ierr = ISDestroy(isl);CHKERRQ(ierr);
228       } else {
229         osm->y_local[i] = osm->y[i];
230         ierr = PetscObjectReference((PetscObject) osm->y[i]);CHKERRQ(ierr);
231         osm->localization[i] = osm->restriction[i];
232         ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
233         osm->prolongation[i] = osm->restriction[i];
234         ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
235       }
236     }
237     if (firstRow != lastRow) SETERRQ2(PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow);
238     for (i=osm->n_local_true; i<osm->n_local; i++) {
239       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr);
240       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
241       ierr = VecDuplicate(osm->x[i],&osm->y_local[i]);CHKERRQ(ierr);
242       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr);
243       ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr);
244       osm->localization[i] = osm->restriction[i];
245       ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
246       osm->prolongation[i] = osm->restriction[i];
247       ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
248       ierr = ISDestroy(isl);CHKERRQ(ierr);
249     }
250     ierr = VecDestroy(vec);CHKERRQ(ierr);
251 
252     /* Create the local solvers */
253     ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
254     for (i=0; i<osm->n_local_true; i++) {
255       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
256       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
257       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
258       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
259       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
260       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
261       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
262       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
263       osm->ksp[i] = ksp;
264     }
265     scall = MAT_INITIAL_MATRIX;
266 
267   } else {
268     /*
269        Destroy the blocks from the previous iteration
270     */
271     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
272       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
273       scall = MAT_INITIAL_MATRIX;
274     }
275   }
276 
277   /*
278      Extract out the submatrices
279   */
280   ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr);
281   if (scall == MAT_INITIAL_MATRIX) {
282     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
283     for (i=0; i<osm->n_local_true; i++) {
284       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
285       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
286     }
287   }
288 
289   /* Return control to the user so that the submatrices can be modified (e.g., to apply
290      different boundary conditions for the submatrices than for the global problem) */
291   ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
292 
293   /*
294      Loop over subdomains putting them into local ksp
295   */
296   for (i=0; i<osm->n_local_true; i++) {
297     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
298     if (!pc->setupcalled) {
299       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
300     }
301   }
302 
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "PCSetUpOnBlocks_ASM"
308 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
309 {
310   PC_ASM         *osm = (PC_ASM*)pc->data;
311   PetscErrorCode ierr;
312   PetscInt       i;
313 
314   PetscFunctionBegin;
315   for (i=0; i<osm->n_local_true; i++) {
316     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "PCApply_ASM"
323 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
324 {
325   PC_ASM         *osm = (PC_ASM*)pc->data;
326   PetscErrorCode ierr;
327   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
328   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
329 
330   PetscFunctionBegin;
331   /*
332      Support for limiting the restriction or interpolation to only local
333      subdomain values (leaving the other values 0).
334   */
335   if (!(osm->type & PC_ASM_RESTRICT)) {
336     forward = SCATTER_FORWARD_LOCAL;
337     /* have to zero the work RHS since scatter may leave some slots empty */
338     for (i=0; i<n_local_true; i++) {
339       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
340     }
341   }
342   if (!(osm->type & PC_ASM_INTERPOLATE)) {
343     reverse = SCATTER_REVERSE_LOCAL;
344   }
345 
346   for (i=0; i<n_local; i++) {
347     ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
348   }
349   ierr = VecSet(y,0.0);CHKERRQ(ierr);
350   /* do the local solves */
351   for (i=0; i<n_local_true; i++) {
352     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
353     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
354     ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
355     ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
356     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
357   }
358   /* handle the rest of the scatters that do not have local solves */
359   for (i=n_local_true; i<n_local; i++) {
360     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
361     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
362   }
363   for (i=0; i<n_local; i++) {
364     ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
365   }
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "PCApplyTranspose_ASM"
371 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
372 {
373   PC_ASM         *osm = (PC_ASM*)pc->data;
374   PetscErrorCode ierr;
375   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
376   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
377 
378   PetscFunctionBegin;
379   /*
380      Support for limiting the restriction or interpolation to only local
381      subdomain values (leaving the other values 0).
382 
383      Note: these are reversed from the PCApply_ASM() because we are applying the
384      transpose of the three terms
385   */
386   if (!(osm->type & PC_ASM_INTERPOLATE)) {
387     forward = SCATTER_FORWARD_LOCAL;
388     /* have to zero the work RHS since scatter may leave some slots empty */
389     for (i=0; i<n_local_true; i++) {
390       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
391     }
392   }
393   if (!(osm->type & PC_ASM_RESTRICT)) {
394     reverse = SCATTER_REVERSE_LOCAL;
395   }
396 
397   for (i=0; i<n_local; i++) {
398     ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
399   }
400   ierr = VecSet(y,0.0);CHKERRQ(ierr);
401   /* do the local solves */
402   for (i=0; i<n_local_true; i++) {
403     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
404     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
405     ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
406     ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
407     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
408   }
409   /* handle the rest of the scatters that do not have local solves */
410   for (i=n_local_true; i<n_local; i++) {
411     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
412     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
413   }
414   for (i=0; i<n_local; i++) {
415     ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "PCDestroy_ASM"
422 static PetscErrorCode PCDestroy_ASM(PC pc)
423 {
424   PC_ASM         *osm = (PC_ASM*)pc->data;
425   PetscErrorCode ierr;
426   PetscInt       i;
427 
428   PetscFunctionBegin;
429   if (osm->ksp) {
430     for (i=0; i<osm->n_local_true; i++) {
431       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
432     }
433     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
434   }
435   if (osm->pmat) {
436     if (osm->n_local_true > 0) {
437       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
438     }
439   }
440   if (osm->restriction) {
441     for (i=0; i<osm->n_local; i++) {
442       ierr = VecScatterDestroy(osm->restriction[i]);CHKERRQ(ierr);
443       ierr = VecScatterDestroy(osm->prolongation[i]);CHKERRQ(ierr);
444       ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
445       ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
446       ierr = VecDestroy(osm->y_local[i]);CHKERRQ(ierr);
447     }
448     ierr = PetscFree(osm->restriction);CHKERRQ(ierr);
449     ierr = PetscFree(osm->prolongation);CHKERRQ(ierr);
450     ierr = PetscFree(osm->x);CHKERRQ(ierr);
451     ierr = PetscFree(osm->y);CHKERRQ(ierr);
452     ierr = PetscFree(osm->y_local);CHKERRQ(ierr);
453   }
454   if (osm->is) {
455     ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
456   }
457   ierr = PetscFree(osm);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "PCSetFromOptions_ASM"
463 static PetscErrorCode PCSetFromOptions_ASM(PC pc)
464 {
465   PC_ASM         *osm = (PC_ASM*)pc->data;
466   PetscErrorCode ierr;
467   PetscInt       blocks,ovl;
468   PetscTruth     symset,flg;
469   PCASMType      asmtype;
470 
471   PetscFunctionBegin;
472   /* set the type to symmetric if matrix is symmetric */
473   if (!osm->type_set && pc->pmat) {
474     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
475     if (symset && flg) { osm->type = PC_ASM_BASIC; }
476   }
477   ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr);
478     ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
479     if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); }
480     ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
481     if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
482     flg  = PETSC_FALSE;
483     ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr);
484     if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); }
485   ierr = PetscOptionsTail();CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 /*------------------------------------------------------------------------------------*/
490 
491 EXTERN_C_BEGIN
492 #undef __FUNCT__
493 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM"
494 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
495 {
496   PC_ASM         *osm = (PC_ASM*)pc->data;
497   PetscErrorCode ierr;
498   PetscInt       i;
499 
500   PetscFunctionBegin;
501   if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
502   if (pc->setupcalled && (n != osm->n_local_true || is)) {
503     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup().");
504   }
505   if (!pc->setupcalled) {
506     if (is) {
507       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
508     }
509     if (osm->is) {
510       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
511     }
512     osm->n_local_true = n;
513     osm->is           = 0;
514     osm->is_local     = 0;
515     if (is) {
516       ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr);
517       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
518     }
519     if (is_local) {
520       ierr = PetscMalloc(n*sizeof(IS *),&osm->is_local);CHKERRQ(ierr);
521       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
522     }
523   }
524   PetscFunctionReturn(0);
525 }
526 EXTERN_C_END
527 
528 EXTERN_C_BEGIN
529 #undef __FUNCT__
530 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM"
531 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is)
532 {
533   PC_ASM         *osm = (PC_ASM*)pc->data;
534   PetscErrorCode ierr;
535   PetscMPIInt    rank,size;
536   PetscInt       n;
537 
538   PetscFunctionBegin;
539   if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
540   if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
541 
542   /*
543      Split the subdomains equally amoung all processors
544   */
545   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
546   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
547   n = N/size + ((N % size) > rank);
548   if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
549   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup().");
550   if (!pc->setupcalled) {
551     if (osm->is) {
552       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
553     }
554     osm->n_local_true = n;
555     osm->is           = 0;
556     osm->is_local     = 0;
557   }
558   PetscFunctionReturn(0);
559 }
560 EXTERN_C_END
561 
562 EXTERN_C_BEGIN
563 #undef __FUNCT__
564 #define __FUNCT__ "PCASMSetOverlap_ASM"
565 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
566 {
567   PC_ASM *osm = (PC_ASM*)pc->data;
568 
569   PetscFunctionBegin;
570   if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
571   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup().");
572   if (!pc->setupcalled) {
573     osm->overlap = ovl;
574   }
575   PetscFunctionReturn(0);
576 }
577 EXTERN_C_END
578 
579 EXTERN_C_BEGIN
580 #undef __FUNCT__
581 #define __FUNCT__ "PCASMSetType_ASM"
582 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type)
583 {
584   PC_ASM *osm = (PC_ASM*)pc->data;
585 
586   PetscFunctionBegin;
587   osm->type     = type;
588   osm->type_set = PETSC_TRUE;
589   PetscFunctionReturn(0);
590 }
591 EXTERN_C_END
592 
593 EXTERN_C_BEGIN
594 #undef __FUNCT__
595 #define __FUNCT__ "PCASMGetSubKSP_ASM"
596 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
597 {
598   PC_ASM         *osm = (PC_ASM*)pc->data;
599   PetscErrorCode ierr;
600 
601   PetscFunctionBegin;
602   if (osm->n_local_true < 1) {
603     SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
604   }
605 
606   if (n_local) {
607     *n_local = osm->n_local_true;
608   }
609   if (first_local) {
610     ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
611     *first_local -= osm->n_local_true;
612   }
613   if (ksp) {
614     /* Assume that local solves are now different; not necessarily
615        true though!  This flag is used only for PCView_ASM() */
616     *ksp                   = osm->ksp;
617     osm->same_local_solves = PETSC_FALSE;
618   }
619   PetscFunctionReturn(0);
620 }
621 EXTERN_C_END
622 
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "PCASMSetLocalSubdomains"
626 /*@C
627     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor
628     only) for the additive Schwarz preconditioner.
629 
630     Collective on PC
631 
632     Input Parameters:
633 +   pc - the preconditioner context
634 .   n - the number of subdomains for this processor (default value = 1)
635 .   is - the index sets that define the subdomains for this processor
636          (or PETSC_NULL for PETSc to determine subdomains)
637 -   is_local - the index sets that define the local part of the subdomains for this processor
638          (or PETSC_NULL to use the default of 1 subdomain per process)
639 
640     Notes:
641     The IS numbering is in the parallel, global numbering of the vector.
642 
643     By default the ASM preconditioner uses 1 block per processor.
644 
645     These index sets cannot be destroyed until after completion of the
646     linear solves for which the ASM preconditioner is being used.
647 
648     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
649 
650     Level: advanced
651 
652 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
653 
654 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
655           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
656 @*/
657 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
658 {
659   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]);
660 
661   PetscFunctionBegin;
662   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
663   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
664   if (f) {
665     ierr = (*f)(pc,n,is,is_local);CHKERRQ(ierr);
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "PCASMSetTotalSubdomains"
672 /*@C
673     PCASMSetTotalSubdomains - Sets the subdomains for all processor for the
674     additive Schwarz preconditioner.  Either all or no processors in the
675     PC communicator must call this routine, with the same index sets.
676 
677     Collective on PC
678 
679     Input Parameters:
680 +   pc - the preconditioner context
681 .   n - the number of subdomains for all processors
682 .   is - the index sets that define the subdomains for all processor
683          (or PETSC_NULL for PETSc to determine subdomains)
684 -   is_local - the index sets that define the local part of the subdomains for this processor
685          (or PETSC_NULL to use the default of 1 subdomain per process)
686 
687     Options Database Key:
688     To set the total number of subdomain blocks rather than specify the
689     index sets, use the option
690 .    -pc_asm_blocks <blks> - Sets total blocks
691 
692     Notes:
693     Currently you cannot use this to set the actual subdomains with the argument is.
694 
695     By default the ASM preconditioner uses 1 block per processor.
696 
697     These index sets cannot be destroyed until after completion of the
698     linear solves for which the ASM preconditioner is being used.
699 
700     Use PCASMSetLocalSubdomains() to set local subdomains.
701 
702     Level: advanced
703 
704 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
705 
706 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
707           PCASMCreateSubdomains2D()
708 @*/
709 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
710 {
711   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]);
712 
713   PetscFunctionBegin;
714   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
715   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
716   if (f) {
717     ierr = (*f)(pc,N,is,is_local);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "PCASMSetOverlap"
724 /*@
725     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
726     additive Schwarz preconditioner.  Either all or no processors in the
727     PC communicator must call this routine.
728 
729     Collective on PC
730 
731     Input Parameters:
732 +   pc  - the preconditioner context
733 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
734 
735     Options Database Key:
736 .   -pc_asm_overlap <ovl> - Sets overlap
737 
738     Notes:
739     By default the ASM preconditioner uses 1 block per processor.  To use
740     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
741     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
742 
743     The overlap defaults to 1, so if one desires that no additional
744     overlap be computed beyond what may have been set with a call to
745     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
746     must be set to be 0.  In particular, if one does not explicitly set
747     the subdomains an application code, then all overlap would be computed
748     internally by PETSc, and using an overlap of 0 would result in an ASM
749     variant that is equivalent to the block Jacobi preconditioner.
750 
751     Note that one can define initial index sets with any overlap via
752     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
753     PCASMSetOverlap() merely allows PETSc to extend that overlap further
754     if desired.
755 
756     Level: intermediate
757 
758 .keywords: PC, ASM, set, overlap
759 
760 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
761           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
762 @*/
763 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl)
764 {
765   PetscErrorCode ierr,(*f)(PC,PetscInt);
766 
767   PetscFunctionBegin;
768   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
769   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr);
770   if (f) {
771     ierr = (*f)(pc,ovl);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "PCASMSetType"
778 /*@
779     PCASMSetType - Sets the type of restriction and interpolation used
780     for local problems in the additive Schwarz method.
781 
782     Collective on PC
783 
784     Input Parameters:
785 +   pc  - the preconditioner context
786 -   type - variant of ASM, one of
787 .vb
788       PC_ASM_BASIC       - full interpolation and restriction
789       PC_ASM_RESTRICT    - full restriction, local processor interpolation
790       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
791       PC_ASM_NONE        - local processor restriction and interpolation
792 .ve
793 
794     Options Database Key:
795 .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
796 
797     Level: intermediate
798 
799 .keywords: PC, ASM, set, type
800 
801 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
802           PCASMCreateSubdomains2D()
803 @*/
804 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type)
805 {
806   PetscErrorCode ierr,(*f)(PC,PCASMType);
807 
808   PetscFunctionBegin;
809   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
810   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr);
811   if (f) {
812     ierr = (*f)(pc,type);CHKERRQ(ierr);
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "PCASMGetSubKSP"
819 /*@C
820    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
821    this processor.
822 
823    Collective on PC iff first_local is requested
824 
825    Input Parameter:
826 .  pc - the preconditioner context
827 
828    Output Parameters:
829 +  n_local - the number of blocks on this processor or PETSC_NULL
830 .  first_local - the global number of the first block on this processor or PETSC_NULL,
831                  all processors must request or all must pass PETSC_NULL
832 -  ksp - the array of KSP contexts
833 
834    Note:
835    After PCASMGetSubKSP() the array of KSPes is not to be freed
836 
837    Currently for some matrix implementations only 1 block per processor
838    is supported.
839 
840    You must call KSPSetUp() before calling PCASMGetSubKSP().
841 
842    Level: advanced
843 
844 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
845 
846 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
847           PCASMCreateSubdomains2D(),
848 @*/
849 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
850 {
851   PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **);
852 
853   PetscFunctionBegin;
854   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
855   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr);
856   if (f) {
857     ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr);
858   } else {
859     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC");
860   }
861 
862  PetscFunctionReturn(0);
863 }
864 
865 /* -------------------------------------------------------------------------------------*/
866 /*MC
867    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
868            its own KSP object.
869 
870    Options Database Keys:
871 +  -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
872 .  -pc_asm_blocks <blks> - Sets total blocks
873 .  -pc_asm_overlap <ovl> - Sets overlap
874 -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
875 
876      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
877       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
878       -pc_asm_type basic to use the standard ASM.
879 
880    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
881      than one processor. Defaults to one block per processor.
882 
883      To set options on the solvers for each block append -sub_ to all the KSP, and PC
884         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
885 
886      To set the options on the solvers separate for each block call PCASMGetSubKSP()
887          and set the options directly on the resulting KSP object (you can access its PC
888          with KSPGetPC())
889 
890 
891    Level: beginner
892 
893    Concepts: additive Schwarz method
894 
895     References:
896     An additive variant of the Schwarz alternating method for the case of many subregions
897     M Dryja, OB Widlund - Courant Institute, New York University Technical report
898 
899     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
900     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
901 
902 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
903            PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
904            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()
905 
906 M*/
907 
908 EXTERN_C_BEGIN
909 #undef __FUNCT__
910 #define __FUNCT__ "PCCreate_ASM"
911 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc)
912 {
913   PetscErrorCode ierr;
914   PC_ASM         *osm;
915 
916   PetscFunctionBegin;
917   ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr);
918   osm->n                 = PETSC_DECIDE;
919   osm->n_local           = 0;
920   osm->n_local_true      = 0;
921   osm->overlap           = 1;
922   osm->ksp               = 0;
923   osm->restriction       = 0;
924   osm->prolongation      = 0;
925   osm->x                 = 0;
926   osm->y                 = 0;
927   osm->y_local           = 0;
928   osm->is                = 0;
929   osm->is_local          = 0;
930   osm->mat               = 0;
931   osm->pmat              = 0;
932   osm->type              = PC_ASM_RESTRICT;
933   osm->same_local_solves = PETSC_TRUE;
934 
935   pc->data                   = (void*)osm;
936   pc->ops->apply             = PCApply_ASM;
937   pc->ops->applytranspose    = PCApplyTranspose_ASM;
938   pc->ops->setup             = PCSetUp_ASM;
939   pc->ops->destroy           = PCDestroy_ASM;
940   pc->ops->setfromoptions    = PCSetFromOptions_ASM;
941   pc->ops->setuponblocks     = PCSetUpOnBlocks_ASM;
942   pc->ops->view              = PCView_ASM;
943   pc->ops->applyrichardson   = 0;
944 
945   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
946                     PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr);
947   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
948                     PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr);
949   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
950                     PCASMSetOverlap_ASM);CHKERRQ(ierr);
951   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
952                     PCASMSetType_ASM);CHKERRQ(ierr);
953   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
954                     PCASMGetSubKSP_ASM);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 EXTERN_C_END
958 
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "PCASMCreateSubdomains"
962 /*@C
963    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
964    preconditioner for a any problem on a general grid.
965 
966    Collective
967 
968    Input Parameters:
969 +  A - The global matrix operator
970 -  n - the number of local blocks
971 
972    Output Parameters:
973 .  outis - the array of index sets defining the subdomains
974 
975    Level: advanced
976 
977    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
978     from these if you use PCASMSetLocalSubdomains()
979 
980     In the Fortran version you must provide the array outis[] already allocated of length n.
981 
982 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
983 
984 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
985 @*/
986 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
987 {
988   MatPartitioning           mpart;
989   const char                *prefix;
990   PetscErrorCode            (*f)(Mat,PetscTruth*,MatReuse,Mat*);
991   PetscMPIInt               size;
992   PetscInt                  i,j,rstart,rend,bs;
993   PetscTruth                iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
994   Mat                       Ad = PETSC_NULL, adj;
995   IS                        ispart,isnumb,*is;
996   PetscErrorCode            ierr;
997 
998   PetscFunctionBegin;
999   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
1000   PetscValidPointer(outis,3);
1001   if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);}
1002 
1003   /* Get prefix, row distribution, and block size */
1004   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1005   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1006   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1007   if (rstart/bs*bs != rstart || rend/bs*bs != rend) {
1008     SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1009   }
1010   /* Get diagonal block from matrix if possible */
1011   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1012   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1013   if (f) {
1014     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1015   } else if (size == 1) {
1016     iscopy = PETSC_FALSE; Ad = A;
1017   } else {
1018     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1019   }
1020   if (Ad) {
1021     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1022     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1023   }
1024   if (Ad && n > 1) {
1025     PetscTruth match,done;
1026     /* Try to setup a good matrix partitioning if available */
1027     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1028     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1029     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1030     ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr);
1031     if (!match) {
1032       ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr);
1033     }
1034     if (!match) { /* assume a "good" partitioner is available */
1035       PetscInt na,*ia,*ja;
1036       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1037       if (done) {
1038 	/* Build adjacency matrix by hand. Unfortunately a call to
1039 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1040 	   remove the block-aij structure and we cannot expect
1041 	   MatPartitioning to split vertices as we need */
1042 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1043 	nnz = 0;
1044 	for (i=0; i<na; i++) { /* count number of nonzeros */
1045 	  len = ia[i+1] - ia[i];
1046 	  row = ja + ia[i];
1047 	  for (j=0; j<len; j++) {
1048 	    if (row[j] == i) { /* don't count diagonal */
1049 	      len--; break;
1050 	    }
1051 	  }
1052 	  nnz += len;
1053 	}
1054 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1055 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1056 	nnz    = 0;
1057 	iia[0] = 0;
1058 	for (i=0; i<na; i++) { /* fill adjacency */
1059 	  cnt = 0;
1060 	  len = ia[i+1] - ia[i];
1061 	  row = ja + ia[i];
1062 	  for (j=0; j<len; j++) {
1063 	    if (row[j] != i) { /* if not diagonal */
1064 	      jja[nnz+cnt++] = row[j];
1065 	    }
1066 	  }
1067 	  nnz += cnt;
1068 	  iia[i+1] = nnz;
1069 	}
1070 	/* Partitioning of the adjacency matrix */
1071 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1072 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1073 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1074 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1075 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1076 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1077 	foundpart = PETSC_TRUE;
1078       }
1079       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1080     }
1081     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1082   }
1083   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1084 
1085   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1086   *outis = is;
1087 
1088   if (!foundpart) {
1089 
1090     /* Partitioning by contiguous chunks of rows */
1091 
1092     PetscInt mbs   = (rend-rstart)/bs;
1093     PetscInt start = rstart;
1094     for (i=0; i<n; i++) {
1095       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1096       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1097       start += count;
1098     }
1099 
1100   } else {
1101 
1102     /* Partitioning by adjacency of diagonal block  */
1103 
1104     const PetscInt *numbering;
1105     PetscInt       *count,nidx,*indices,*newidx,start=0;
1106     /* Get node count in each partition */
1107     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1108     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1109     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1110       for (i=0; i<n; i++) count[i] *= bs;
1111     }
1112     /* Build indices from node numbering */
1113     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1114     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1115     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1116     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1117     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1118     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1119     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1120       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1121       for (i=0; i<nidx; i++)
1122 	for (j=0; j<bs; j++)
1123 	  newidx[i*bs+j] = indices[i]*bs + j;
1124       ierr = PetscFree(indices);CHKERRQ(ierr);
1125       nidx   *= bs;
1126       indices = newidx;
1127     }
1128     /* Shift to get global indices */
1129     for (i=0; i<nidx; i++) indices[i] += rstart;
1130 
1131     /* Build the index sets for each block */
1132     for (i=0; i<n; i++) {
1133       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr);
1134       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1135       start += count[i];
1136     }
1137 
1138     ierr = PetscFree(count);
1139     ierr = PetscFree(indices);
1140     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1141     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1142 
1143   }
1144 
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "PCASMDestroySubdomains"
1150 /*@C
1151    PCASMDestroySubdomains - Destroys the index sets created with
1152    PCASMCreateSubdomains(). Should be called after setting subdomains
1153    with PCASMSetLocalSubdomains().
1154 
1155    Collective
1156 
1157    Input Parameters:
1158 +  n - the number of index sets
1159 .  is - the array of index sets
1160 -  is_local - the array of local index sets, can be PETSC_NULL
1161 
1162    Level: advanced
1163 
1164 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1165 
1166 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1167 @*/
1168 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1169 {
1170   PetscInt       i;
1171   PetscErrorCode ierr;
1172   PetscFunctionBegin;
1173   if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1174   PetscValidPointer(is,2);
1175   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1176   ierr = PetscFree(is);CHKERRQ(ierr);
1177   if (is_local) {
1178     PetscValidPointer(is_local,3);
1179     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1180     ierr = PetscFree(is_local);CHKERRQ(ierr);
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "PCASMCreateSubdomains2D"
1187 /*@
1188    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1189    preconditioner for a two-dimensional problem on a regular grid.
1190 
1191    Not Collective
1192 
1193    Input Parameters:
1194 +  m, n - the number of mesh points in the x and y directions
1195 .  M, N - the number of subdomains in the x and y directions
1196 .  dof - degrees of freedom per node
1197 -  overlap - overlap in mesh lines
1198 
1199    Output Parameters:
1200 +  Nsub - the number of subdomains created
1201 -  is - the array of index sets defining the subdomains
1202 
1203    Note:
1204    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1205    preconditioners.  More general related routines are
1206    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1207 
1208    Level: advanced
1209 
1210 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1211 
1212 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1213           PCASMSetOverlap()
1214 @*/
1215 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is)
1216 {
1217   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter;
1218   PetscErrorCode ierr;
1219   PetscInt       nidx,*idx,loc,ii,jj,count;
1220 
1221   PetscFunctionBegin;
1222   if (dof != 1) SETERRQ(PETSC_ERR_SUP," ");
1223 
1224   *Nsub = N*M;
1225   ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr);
1226   ystart = 0;
1227   loc_outter = 0;
1228   for (i=0; i<N; i++) {
1229     height = n/N + ((n % N) > i); /* height of subdomain */
1230     if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1231     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1232     yright = ystart + height + overlap; if (yright > n) yright = n;
1233     xstart = 0;
1234     for (j=0; j<M; j++) {
1235       width = m/M + ((m % M) > j); /* width of subdomain */
1236       if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1237       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1238       xright = xstart + width + overlap; if (xright > m) xright = m;
1239       nidx   = (xright - xleft)*(yright - yleft);
1240       ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1241       loc    = 0;
1242       for (ii=yleft; ii<yright; ii++) {
1243         count = m*ii + xleft;
1244         for (jj=xleft; jj<xright; jj++) {
1245           idx[loc++] = count++;
1246         }
1247       }
1248       ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr);
1249       ierr = PetscFree(idx);CHKERRQ(ierr);
1250       xstart += width;
1251     }
1252     ystart += height;
1253   }
1254   for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "PCASMGetLocalSubdomains"
1260 /*@C
1261     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1262     only) for the additive Schwarz preconditioner.
1263 
1264     Collective on PC
1265 
1266     Input Parameter:
1267 .   pc - the preconditioner context
1268 
1269     Output Parameters:
1270 +   n - the number of subdomains for this processor (default value = 1)
1271 .   is - the index sets that define the subdomains for this processor
1272 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1273 
1274 
1275     Notes:
1276     The IS numbering is in the parallel, global numbering of the vector.
1277 
1278     Level: advanced
1279 
1280 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1281 
1282 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1283           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1284 @*/
1285 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1286 {
1287   PC_ASM         *osm;
1288   PetscErrorCode ierr;
1289   PetscTruth     match;
1290 
1291   PetscFunctionBegin;
1292   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1293   PetscValidIntPointer(n,2);
1294   if (is) PetscValidPointer(is,3);
1295   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1296   if (!match) {
1297     if (n)  *n  = 0;
1298     if (is) *is = PETSC_NULL;
1299   } else {
1300     osm = (PC_ASM*)pc->data;
1301     if (n)  *n  = osm->n_local_true;
1302     if (is) *is = osm->is;
1303     if (is_local) *is_local = osm->is_local;
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "PCASMGetLocalSubmatrices"
1310 /*@C
1311     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1312     only) for the additive Schwarz preconditioner.
1313 
1314     Collective on PC
1315 
1316     Input Parameter:
1317 .   pc - the preconditioner context
1318 
1319     Output Parameters:
1320 +   n - the number of matrices for this processor (default value = 1)
1321 -   mat - the matrices
1322 
1323 
1324     Level: advanced
1325 
1326 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1327 
1328 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1329           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1330 @*/
1331 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1332 {
1333   PC_ASM         *osm;
1334   PetscErrorCode ierr;
1335   PetscTruth     match;
1336 
1337   PetscFunctionBegin;
1338   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1339   PetscValidIntPointer(n,2);
1340   if (mat) PetscValidPointer(mat,3);
1341   if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1342   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1343   if (!match) {
1344     if (n)   *n   = 0;
1345     if (mat) *mat = PETSC_NULL;
1346   } else {
1347     osm = (PC_ASM*)pc->data;
1348     if (n)   *n   = osm->n_local_true;
1349     if (mat) *mat = osm->pmat;
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353