xref: /petsc/src/ksp/pc/impls/asm/asm.c (revision 8297ded30a321e53c27a6533b992dad1c8c5d89d)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an additive Schwarz preconditioner for any Mat implementation.
5 
6   Note that each processor may have any number of subdomains. But in order to
7   deal easily with the VecScatter(), we treat each processor as if it has the
8   same number of subdomains.
9 
10        n - total number of true subdomains on all processors
11        n_local_true - actual number of subdomains on this processor
12        n_local = maximum over all processors of n_local_true
13 */
14 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
15 
16 typedef struct {
17   PetscInt   n, n_local, n_local_true;
18   PetscInt   overlap;             /* overlap requested by user */
19   KSP        *ksp;                /* linear solvers for each block */
20   VecScatter *restriction;        /* mapping from global to subregion */
21   VecScatter *localization;       /* mapping from overlapping to non-overlapping subregion */
22   VecScatter *prolongation;       /* mapping from subregion to global */
23   Vec        *x,*y,*y_local;      /* work vectors */
24   IS         *is;                 /* index set that defines each overlapping subdomain */
25   IS         *is_local;           /* index set that defines each non-overlapping subdomain, may be NULL */
26   Mat        *mat,*pmat;          /* mat is not currently used */
27   PCASMType  type;                /* use reduced interpolation, restriction or both */
28   PetscTruth type_set;            /* if user set this value (so won't change it for symmetric problems) */
29   PetscTruth same_local_solves;   /* flag indicating whether all local solvers are same */
30 } PC_ASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCView_ASM"
34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35 {
36   PC_ASM         *osm = (PC_ASM*)pc->data;
37   PetscErrorCode ierr;
38   PetscMPIInt    rank;
39   PetscInt       i,bsz;
40   PetscTruth     iascii,isstring;
41   PetscViewer    sviewer;
42 
43 
44   PetscFunctionBegin;
45   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
46   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
47   if (iascii) {
48     if (osm->n > 0) {
49       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr);
50     } else {
51       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr);
54     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
55     if (osm->same_local_solves) {
56       if (osm->ksp) {
57         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr);
58         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
59         if (!rank) {
60           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
61           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
62           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
63         }
64         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
65       }
66     } else {
67       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr);
68       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
69       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
70       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
71       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
72       for (i=0; i<osm->n_local; i++) {
73         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
74         if (i < osm->n_local_true) {
75           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
76           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr);
77           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
78           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
79         }
80         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
81       }
82       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
83       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
84     }
85   } else if (isstring) {
86     ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr);
87     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
88     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
89     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
90   } else {
91     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "PCASMPrintSubdomains"
98 static PetscErrorCode PCASMPrintSubdomains(PC pc)
99 {
100   PC_ASM         *osm  = (PC_ASM*)pc->data;
101   const char     *prefix;
102   char           fname[PETSC_MAX_PATH_LEN+1];
103   PetscViewer    viewer;
104   PetscInt       i,j,nidx;
105   const PetscInt *idx;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
110   ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr);
111   if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
112   ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr);
113   for (i=0;i<osm->n_local_true;i++) {
114     ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr);
115     ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr);
116     for (j=0; j<nidx; j++) {
117       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
118     }
119     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
120     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
121     if (osm->is_local) {
122       ierr = ISGetLocalSize(osm->is_local[i],&nidx);CHKERRQ(ierr);
123       ierr = ISGetIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
124       for (j=0; j<nidx; j++) {
125         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
126       }
127       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
128       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
129     }
130   }
131   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
132   ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "PCSetUp_ASM"
138 static PetscErrorCode PCSetUp_ASM(PC pc)
139 {
140   PC_ASM         *osm  = (PC_ASM*)pc->data;
141   PetscErrorCode ierr;
142   PetscTruth     symset,flg;
143   PetscInt       i,m,m_local,firstRow,lastRow;
144   PetscMPIInt    size;
145   MatReuse       scall = MAT_REUSE_MATRIX;
146   IS             isl;
147   KSP            ksp;
148   PC             subpc;
149   const char     *prefix,*pprefix;
150   Vec            vec;
151 
152   PetscFunctionBegin;
153   if (!pc->setupcalled) {
154 
155     if (!osm->type_set) {
156       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
157       if (symset && flg) { osm->type = PC_ASM_BASIC; }
158     }
159 
160     if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) {
161       /* no subdomains given, use one per processor */
162       osm->n_local = osm->n_local_true = 1;
163       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
164       osm->n = size;
165     } else if (osm->n == PETSC_DECIDE) {
166       /* determine global number of subdomains */
167       PetscInt inwork[2],outwork[2];
168       inwork[0] = inwork[1] = osm->n_local_true;
169       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
170       osm->n_local = outwork[0];
171       osm->n       = outwork[1];
172     }
173 
174     if (!osm->is){ /* create the index sets */
175       ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr);
176     }
177     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
178     flg  = PETSC_FALSE;
179     ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
180     if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); }
181 
182     /*  Extend the "overlapping" regions by a number of steps  */
183     ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr);
184     for (i=0; i<osm->n_local_true; i++) {
185       ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
186       if (osm->is_local) {
187         ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
188       }
189     }
190 
191     /* Create the local work vectors and scatter contexts */
192     ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
193     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->restriction);CHKERRQ(ierr);
194     if (osm->is_local) {ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->localization);CHKERRQ(ierr);}
195     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->prolongation);CHKERRQ(ierr);
196     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr);
197     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr);
198     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y_local);CHKERRQ(ierr);
199     ierr = VecGetOwnershipRange(vec, &firstRow, &lastRow);CHKERRQ(ierr);
200     for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) {
201       ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr);
202       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr);
203       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
204       ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr);
205       ierr = ISDestroy(isl);CHKERRQ(ierr);
206       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
207       if (osm->is_local) {
208         IS              isShift;
209         const PetscInt *idx_local;
210         PetscInt       *idx, j;
211 
212         ierr = ISGetLocalSize(osm->is_local[i],&m_local);CHKERRQ(ierr);
213         ierr = ISGetIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr);
214         ierr = PetscMalloc(m*sizeof(PetscInt), &idx);CHKERRQ(ierr);
215         for(j = 0; j < m_local; ++j) {
216           idx[j] = idx_local[j] - firstRow;
217         }
218         ierr = ISRestoreIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr);
219         ierr = ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,&isShift);CHKERRQ(ierr);
220         ierr = PetscFree(idx);CHKERRQ(ierr);
221         ierr = ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);CHKERRQ(ierr);
222         ierr = VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);CHKERRQ(ierr);
223         ierr = VecScatterCreate(osm->y[i],isShift,osm->y_local[i],isl,&osm->localization[i]);CHKERRQ(ierr);
224         ierr = ISDestroy(isShift);CHKERRQ(ierr);
225 
226         ierr = VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);CHKERRQ(ierr);
227         ierr = ISDestroy(isl);CHKERRQ(ierr);
228       } else {
229         ierr = VecGetLocalSize(vec,&m_local);CHKERRQ(ierr);
230         osm->y_local[i] = osm->y[i];
231         ierr = PetscObjectReference((PetscObject) osm->y[i]);CHKERRQ(ierr);
232         osm->prolongation[i] = osm->restriction[i];
233         ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
234       }
235     }
236     if (firstRow != lastRow) SETERRQ2(PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow);
237     for (i=osm->n_local_true; i<osm->n_local; i++) {
238       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr);
239       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
240       ierr = VecDuplicate(osm->x[i],&osm->y_local[i]);CHKERRQ(ierr);
241       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr);
242       ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr);
243       osm->prolongation[i] = osm->restriction[i];
244       ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
245       ierr = ISDestroy(isl);CHKERRQ(ierr);
246     }
247     ierr = VecDestroy(vec);CHKERRQ(ierr);
248 
249     /* Create the local solvers */
250     ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
251     for (i=0; i<osm->n_local_true; i++) {
252       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
253       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
254       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
255       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
256       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
257       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
258       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
259       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
260       osm->ksp[i] = ksp;
261     }
262     scall = MAT_INITIAL_MATRIX;
263 
264   } else {
265     /*
266        Destroy the blocks from the previous iteration
267     */
268     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
269       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
270       scall = MAT_INITIAL_MATRIX;
271     }
272   }
273 
274   /*
275      Extract out the submatrices
276   */
277   ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr);
278   if (scall == MAT_INITIAL_MATRIX) {
279     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
280     for (i=0; i<osm->n_local_true; i++) {
281       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
282       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
283     }
284   }
285 
286   /* Return control to the user so that the submatrices can be modified (e.g., to apply
287      different boundary conditions for the submatrices than for the global problem) */
288   ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
289 
290   /*
291      Loop over subdomains putting them into local ksp
292   */
293   for (i=0; i<osm->n_local_true; i++) {
294     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
295     if (!pc->setupcalled) {
296       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
297     }
298   }
299 
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "PCSetUpOnBlocks_ASM"
305 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
306 {
307   PC_ASM         *osm = (PC_ASM*)pc->data;
308   PetscErrorCode ierr;
309   PetscInt       i;
310 
311   PetscFunctionBegin;
312   for (i=0; i<osm->n_local_true; i++) {
313     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "PCApply_ASM"
320 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
321 {
322   PC_ASM         *osm = (PC_ASM*)pc->data;
323   PetscErrorCode ierr;
324   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
325   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
326 
327   PetscFunctionBegin;
328   /*
329      Support for limiting the restriction or interpolation to only local
330      subdomain values (leaving the other values 0).
331   */
332   if (!(osm->type & PC_ASM_RESTRICT)) {
333     forward = SCATTER_FORWARD_LOCAL;
334     /* have to zero the work RHS since scatter may leave some slots empty */
335     for (i=0; i<n_local_true; i++) {
336       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
337     }
338   }
339   if (!(osm->type & PC_ASM_INTERPOLATE)) {
340     reverse = SCATTER_REVERSE_LOCAL;
341   }
342 
343   for (i=0; i<n_local; i++) {
344     ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
345   }
346   ierr = VecSet(y,0.0);CHKERRQ(ierr);
347   /* do the local solves */
348   for (i=0; i<n_local_true; i++) {
349     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
350     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
351     if (osm->localization) {
352       ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
353       ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
354     }
355     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
356   }
357   /* handle the rest of the scatters that do not have local solves */
358   for (i=n_local_true; i<n_local; i++) {
359     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
360     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
361   }
362   for (i=0; i<n_local; i++) {
363     ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "PCApplyTranspose_ASM"
370 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
371 {
372   PC_ASM         *osm = (PC_ASM*)pc->data;
373   PetscErrorCode ierr;
374   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
375   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
376 
377   PetscFunctionBegin;
378   /*
379      Support for limiting the restriction or interpolation to only local
380      subdomain values (leaving the other values 0).
381 
382      Note: these are reversed from the PCApply_ASM() because we are applying the
383      transpose of the three terms
384   */
385   if (!(osm->type & PC_ASM_INTERPOLATE)) {
386     forward = SCATTER_FORWARD_LOCAL;
387     /* have to zero the work RHS since scatter may leave some slots empty */
388     for (i=0; i<n_local_true; i++) {
389       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
390     }
391   }
392   if (!(osm->type & PC_ASM_RESTRICT)) {
393     reverse = SCATTER_REVERSE_LOCAL;
394   }
395 
396   for (i=0; i<n_local; i++) {
397     ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
398   }
399   ierr = VecSet(y,0.0);CHKERRQ(ierr);
400   /* do the local solves */
401   for (i=0; i<n_local_true; i++) {
402     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
403     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
404     if (osm->localization) {
405       ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
406       ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
407     }
408     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
409   }
410   /* handle the rest of the scatters that do not have local solves */
411   for (i=n_local_true; i<n_local; i++) {
412     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
413     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
414   }
415   for (i=0; i<n_local; i++) {
416     ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "PCDestroy_ASM"
423 static PetscErrorCode PCDestroy_ASM(PC pc)
424 {
425   PC_ASM         *osm = (PC_ASM*)pc->data;
426   PetscErrorCode ierr;
427   PetscInt       i;
428 
429   PetscFunctionBegin;
430   if (osm->ksp) {
431     for (i=0; i<osm->n_local_true; i++) {
432       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
433     }
434     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
435   }
436   if (osm->pmat) {
437     if (osm->n_local_true > 0) {
438       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
439     }
440   }
441   if (osm->restriction) {
442     for (i=0; i<osm->n_local; i++) {
443       ierr = VecScatterDestroy(osm->restriction[i]);CHKERRQ(ierr);
444       if (osm->localization) {ierr = VecScatterDestroy(osm->localization[i]);CHKERRQ(ierr);}
445       ierr = VecScatterDestroy(osm->prolongation[i]);CHKERRQ(ierr);
446       ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
447       ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
448       ierr = VecDestroy(osm->y_local[i]);CHKERRQ(ierr);
449     }
450     ierr = PetscFree(osm->restriction);CHKERRQ(ierr);
451     if (osm->localization) {ierr = PetscFree(osm->localization);CHKERRQ(ierr);}
452     ierr = PetscFree(osm->prolongation);CHKERRQ(ierr);
453     ierr = PetscFree(osm->x);CHKERRQ(ierr);
454     ierr = PetscFree(osm->y);CHKERRQ(ierr);
455     ierr = PetscFree(osm->y_local);CHKERRQ(ierr);
456   }
457   if (osm->is) {
458     ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
459   }
460   ierr = PetscFree(osm);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "PCSetFromOptions_ASM"
466 static PetscErrorCode PCSetFromOptions_ASM(PC pc)
467 {
468   PC_ASM         *osm = (PC_ASM*)pc->data;
469   PetscErrorCode ierr;
470   PetscInt       blocks,ovl;
471   PetscTruth     symset,flg;
472   PCASMType      asmtype;
473 
474   PetscFunctionBegin;
475   /* set the type to symmetric if matrix is symmetric */
476   if (!osm->type_set && pc->pmat) {
477     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
478     if (symset && flg) { osm->type = PC_ASM_BASIC; }
479   }
480   ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr);
481     ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
482     if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); }
483     ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
484     if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
485     flg  = PETSC_FALSE;
486     ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr);
487     if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); }
488   ierr = PetscOptionsTail();CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 /*------------------------------------------------------------------------------------*/
493 
494 EXTERN_C_BEGIN
495 #undef __FUNCT__
496 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM"
497 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
498 {
499   PC_ASM         *osm = (PC_ASM*)pc->data;
500   PetscErrorCode ierr;
501   PetscInt       i;
502 
503   PetscFunctionBegin;
504   if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
505   if (pc->setupcalled && (n != osm->n_local_true || is)) {
506     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup().");
507   }
508   if (!pc->setupcalled) {
509     if (is) {
510       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
511     }
512     if (osm->is) {
513       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
514     }
515     osm->n_local_true = n;
516     osm->is           = 0;
517     osm->is_local     = 0;
518     if (is) {
519       ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr);
520       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
521     }
522     if (is_local) {
523       ierr = PetscMalloc(n*sizeof(IS *),&osm->is_local);CHKERRQ(ierr);
524       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
525     }
526   }
527   PetscFunctionReturn(0);
528 }
529 EXTERN_C_END
530 
531 EXTERN_C_BEGIN
532 #undef __FUNCT__
533 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM"
534 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is)
535 {
536   PC_ASM         *osm = (PC_ASM*)pc->data;
537   PetscErrorCode ierr;
538   PetscMPIInt    rank,size;
539   PetscInt       n;
540 
541   PetscFunctionBegin;
542   if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
543   if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
544 
545   /*
546      Split the subdomains equally amoung all processors
547   */
548   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
549   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
550   n = N/size + ((N % size) > rank);
551   if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
552   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup().");
553   if (!pc->setupcalled) {
554     if (osm->is) {
555       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
556     }
557     osm->n_local_true = n;
558     osm->is           = 0;
559     osm->is_local     = 0;
560   }
561   PetscFunctionReturn(0);
562 }
563 EXTERN_C_END
564 
565 EXTERN_C_BEGIN
566 #undef __FUNCT__
567 #define __FUNCT__ "PCASMSetOverlap_ASM"
568 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
569 {
570   PC_ASM *osm = (PC_ASM*)pc->data;
571 
572   PetscFunctionBegin;
573   if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
574   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup().");
575   if (!pc->setupcalled) {
576     osm->overlap = ovl;
577   }
578   PetscFunctionReturn(0);
579 }
580 EXTERN_C_END
581 
582 EXTERN_C_BEGIN
583 #undef __FUNCT__
584 #define __FUNCT__ "PCASMSetType_ASM"
585 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type)
586 {
587   PC_ASM *osm = (PC_ASM*)pc->data;
588 
589   PetscFunctionBegin;
590   osm->type     = type;
591   osm->type_set = PETSC_TRUE;
592   PetscFunctionReturn(0);
593 }
594 EXTERN_C_END
595 
596 EXTERN_C_BEGIN
597 #undef __FUNCT__
598 #define __FUNCT__ "PCASMGetSubKSP_ASM"
599 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
600 {
601   PC_ASM         *osm = (PC_ASM*)pc->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   if (osm->n_local_true < 1) {
606     SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
607   }
608 
609   if (n_local) {
610     *n_local = osm->n_local_true;
611   }
612   if (first_local) {
613     ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
614     *first_local -= osm->n_local_true;
615   }
616   if (ksp) {
617     /* Assume that local solves are now different; not necessarily
618        true though!  This flag is used only for PCView_ASM() */
619     *ksp                   = osm->ksp;
620     osm->same_local_solves = PETSC_FALSE;
621   }
622   PetscFunctionReturn(0);
623 }
624 EXTERN_C_END
625 
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "PCASMSetLocalSubdomains"
629 /*@C
630     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor
631     only) for the additive Schwarz preconditioner.
632 
633     Collective on PC
634 
635     Input Parameters:
636 +   pc - the preconditioner context
637 .   n - the number of subdomains for this processor (default value = 1)
638 .   is - the index sets that define the subdomains for this processor
639          (or PETSC_NULL for PETSc to determine subdomains)
640 -   is_local - the index sets that define the local part of the subdomains for this processor
641          (or PETSC_NULL to use the default of 1 subdomain per process)
642 
643     Notes:
644     The IS numbering is in the parallel, global numbering of the vector.
645 
646     By default the ASM preconditioner uses 1 block per processor.
647 
648     These index sets cannot be destroyed until after completion of the
649     linear solves for which the ASM preconditioner is being used.
650 
651     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
652 
653     Level: advanced
654 
655 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
656 
657 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
658           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
659 @*/
660 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
661 {
662   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]);
663 
664   PetscFunctionBegin;
665   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
666   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
667   if (f) {
668     ierr = (*f)(pc,n,is,is_local);CHKERRQ(ierr);
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 #undef __FUNCT__
674 #define __FUNCT__ "PCASMSetTotalSubdomains"
675 /*@C
676     PCASMSetTotalSubdomains - Sets the subdomains for all processor for the
677     additive Schwarz preconditioner.  Either all or no processors in the
678     PC communicator must call this routine, with the same index sets.
679 
680     Collective on PC
681 
682     Input Parameters:
683 +   pc - the preconditioner context
684 .   n - the number of subdomains for all processors
685 .   is - the index sets that define the subdomains for all processor
686          (or PETSC_NULL for PETSc to determine subdomains)
687 -   is_local - the index sets that define the local part of the subdomains for this processor
688          (or PETSC_NULL to use the default of 1 subdomain per process)
689 
690     Options Database Key:
691     To set the total number of subdomain blocks rather than specify the
692     index sets, use the option
693 .    -pc_asm_blocks <blks> - Sets total blocks
694 
695     Notes:
696     Currently you cannot use this to set the actual subdomains with the argument is.
697 
698     By default the ASM preconditioner uses 1 block per processor.
699 
700     These index sets cannot be destroyed until after completion of the
701     linear solves for which the ASM preconditioner is being used.
702 
703     Use PCASMSetLocalSubdomains() to set local subdomains.
704 
705     Level: advanced
706 
707 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
708 
709 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
710           PCASMCreateSubdomains2D()
711 @*/
712 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
713 {
714   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]);
715 
716   PetscFunctionBegin;
717   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
718   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
719   if (f) {
720     ierr = (*f)(pc,N,is,is_local);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "PCASMSetOverlap"
727 /*@
728     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
729     additive Schwarz preconditioner.  Either all or no processors in the
730     PC communicator must call this routine.
731 
732     Collective on PC
733 
734     Input Parameters:
735 +   pc  - the preconditioner context
736 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
737 
738     Options Database Key:
739 .   -pc_asm_overlap <ovl> - Sets overlap
740 
741     Notes:
742     By default the ASM preconditioner uses 1 block per processor.  To use
743     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
744     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
745 
746     The overlap defaults to 1, so if one desires that no additional
747     overlap be computed beyond what may have been set with a call to
748     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
749     must be set to be 0.  In particular, if one does not explicitly set
750     the subdomains an application code, then all overlap would be computed
751     internally by PETSc, and using an overlap of 0 would result in an ASM
752     variant that is equivalent to the block Jacobi preconditioner.
753 
754     Note that one can define initial index sets with any overlap via
755     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
756     PCASMSetOverlap() merely allows PETSc to extend that overlap further
757     if desired.
758 
759     Level: intermediate
760 
761 .keywords: PC, ASM, set, overlap
762 
763 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
764           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
765 @*/
766 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl)
767 {
768   PetscErrorCode ierr,(*f)(PC,PetscInt);
769 
770   PetscFunctionBegin;
771   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
772   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr);
773   if (f) {
774     ierr = (*f)(pc,ovl);CHKERRQ(ierr);
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "PCASMSetType"
781 /*@
782     PCASMSetType - Sets the type of restriction and interpolation used
783     for local problems in the additive Schwarz method.
784 
785     Collective on PC
786 
787     Input Parameters:
788 +   pc  - the preconditioner context
789 -   type - variant of ASM, one of
790 .vb
791       PC_ASM_BASIC       - full interpolation and restriction
792       PC_ASM_RESTRICT    - full restriction, local processor interpolation
793       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
794       PC_ASM_NONE        - local processor restriction and interpolation
795 .ve
796 
797     Options Database Key:
798 .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
799 
800     Level: intermediate
801 
802 .keywords: PC, ASM, set, type
803 
804 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
805           PCASMCreateSubdomains2D()
806 @*/
807 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type)
808 {
809   PetscErrorCode ierr,(*f)(PC,PCASMType);
810 
811   PetscFunctionBegin;
812   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
813   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr);
814   if (f) {
815     ierr = (*f)(pc,type);CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "PCASMGetSubKSP"
822 /*@C
823    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
824    this processor.
825 
826    Collective on PC iff first_local is requested
827 
828    Input Parameter:
829 .  pc - the preconditioner context
830 
831    Output Parameters:
832 +  n_local - the number of blocks on this processor or PETSC_NULL
833 .  first_local - the global number of the first block on this processor or PETSC_NULL,
834                  all processors must request or all must pass PETSC_NULL
835 -  ksp - the array of KSP contexts
836 
837    Note:
838    After PCASMGetSubKSP() the array of KSPes is not to be freed
839 
840    Currently for some matrix implementations only 1 block per processor
841    is supported.
842 
843    You must call KSPSetUp() before calling PCASMGetSubKSP().
844 
845    Level: advanced
846 
847 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
848 
849 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
850           PCASMCreateSubdomains2D(),
851 @*/
852 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
853 {
854   PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **);
855 
856   PetscFunctionBegin;
857   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
858   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr);
859   if (f) {
860     ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr);
861   } else {
862     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC");
863   }
864 
865  PetscFunctionReturn(0);
866 }
867 
868 /* -------------------------------------------------------------------------------------*/
869 /*MC
870    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
871            its own KSP object.
872 
873    Options Database Keys:
874 +  -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
875 .  -pc_asm_blocks <blks> - Sets total blocks
876 .  -pc_asm_overlap <ovl> - Sets overlap
877 -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
878 
879      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
880       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
881       -pc_asm_type basic to use the standard ASM.
882 
883    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
884      than one processor. Defaults to one block per processor.
885 
886      To set options on the solvers for each block append -sub_ to all the KSP, and PC
887         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
888 
889      To set the options on the solvers separate for each block call PCASMGetSubKSP()
890          and set the options directly on the resulting KSP object (you can access its PC
891          with KSPGetPC())
892 
893 
894    Level: beginner
895 
896    Concepts: additive Schwarz method
897 
898     References:
899     An additive variant of the Schwarz alternating method for the case of many subregions
900     M Dryja, OB Widlund - Courant Institute, New York University Technical report
901 
902     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
903     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
904 
905 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
906            PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
907            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()
908 
909 M*/
910 
911 EXTERN_C_BEGIN
912 #undef __FUNCT__
913 #define __FUNCT__ "PCCreate_ASM"
914 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc)
915 {
916   PetscErrorCode ierr;
917   PC_ASM         *osm;
918 
919   PetscFunctionBegin;
920   ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr);
921   osm->n                 = PETSC_DECIDE;
922   osm->n_local           = 0;
923   osm->n_local_true      = 0;
924   osm->overlap           = 1;
925   osm->ksp               = 0;
926   osm->restriction       = 0;
927   osm->localization      = 0;
928   osm->prolongation      = 0;
929   osm->x                 = 0;
930   osm->y                 = 0;
931   osm->y_local           = 0;
932   osm->is                = 0;
933   osm->is_local          = 0;
934   osm->mat               = 0;
935   osm->pmat              = 0;
936   osm->type              = PC_ASM_RESTRICT;
937   osm->same_local_solves = PETSC_TRUE;
938 
939   pc->data                   = (void*)osm;
940   pc->ops->apply             = PCApply_ASM;
941   pc->ops->applytranspose    = PCApplyTranspose_ASM;
942   pc->ops->setup             = PCSetUp_ASM;
943   pc->ops->destroy           = PCDestroy_ASM;
944   pc->ops->setfromoptions    = PCSetFromOptions_ASM;
945   pc->ops->setuponblocks     = PCSetUpOnBlocks_ASM;
946   pc->ops->view              = PCView_ASM;
947   pc->ops->applyrichardson   = 0;
948 
949   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
950                     PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr);
951   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
952                     PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr);
953   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
954                     PCASMSetOverlap_ASM);CHKERRQ(ierr);
955   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
956                     PCASMSetType_ASM);CHKERRQ(ierr);
957   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
958                     PCASMGetSubKSP_ASM);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 EXTERN_C_END
962 
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "PCASMCreateSubdomains"
966 /*@C
967    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
968    preconditioner for a any problem on a general grid.
969 
970    Collective
971 
972    Input Parameters:
973 +  A - The global matrix operator
974 -  n - the number of local blocks
975 
976    Output Parameters:
977 .  outis - the array of index sets defining the subdomains
978 
979    Level: advanced
980 
981    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
982     from these if you use PCASMSetLocalSubdomains()
983 
984     In the Fortran version you must provide the array outis[] already allocated of length n.
985 
986 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
987 
988 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
989 @*/
990 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
991 {
992   MatPartitioning           mpart;
993   const char                *prefix;
994   PetscErrorCode            (*f)(Mat,PetscTruth*,MatReuse,Mat*);
995   PetscMPIInt               size;
996   PetscInt                  i,j,rstart,rend,bs;
997   PetscTruth                iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
998   Mat                       Ad = PETSC_NULL, adj;
999   IS                        ispart,isnumb,*is;
1000   PetscErrorCode            ierr;
1001 
1002   PetscFunctionBegin;
1003   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
1004   PetscValidPointer(outis,3);
1005   if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);}
1006 
1007   /* Get prefix, row distribution, and block size */
1008   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1009   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1010   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1011   if (rstart/bs*bs != rstart || rend/bs*bs != rend) {
1012     SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1013   }
1014   /* Get diagonal block from matrix if possible */
1015   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1016   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1017   if (f) {
1018     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1019   } else if (size == 1) {
1020     iscopy = PETSC_FALSE; Ad = A;
1021   } else {
1022     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1023   }
1024   if (Ad) {
1025     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1026     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1027   }
1028   if (Ad && n > 1) {
1029     PetscTruth match,done;
1030     /* Try to setup a good matrix partitioning if available */
1031     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1032     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1033     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1034     ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr);
1035     if (!match) {
1036       ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr);
1037     }
1038     if (!match) { /* assume a "good" partitioner is available */
1039       PetscInt na,*ia,*ja;
1040       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1041       if (done) {
1042 	/* Build adjacency matrix by hand. Unfortunately a call to
1043 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1044 	   remove the block-aij structure and we cannot expect
1045 	   MatPartitioning to split vertices as we need */
1046 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1047 	nnz = 0;
1048 	for (i=0; i<na; i++) { /* count number of nonzeros */
1049 	  len = ia[i+1] - ia[i];
1050 	  row = ja + ia[i];
1051 	  for (j=0; j<len; j++) {
1052 	    if (row[j] == i) { /* don't count diagonal */
1053 	      len--; break;
1054 	    }
1055 	  }
1056 	  nnz += len;
1057 	}
1058 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1059 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1060 	nnz    = 0;
1061 	iia[0] = 0;
1062 	for (i=0; i<na; i++) { /* fill adjacency */
1063 	  cnt = 0;
1064 	  len = ia[i+1] - ia[i];
1065 	  row = ja + ia[i];
1066 	  for (j=0; j<len; j++) {
1067 	    if (row[j] != i) { /* if not diagonal */
1068 	      jja[nnz+cnt++] = row[j];
1069 	    }
1070 	  }
1071 	  nnz += cnt;
1072 	  iia[i+1] = nnz;
1073 	}
1074 	/* Partitioning of the adjacency matrix */
1075 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1076 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1077 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1078 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1079 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1080 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1081 	foundpart = PETSC_TRUE;
1082       }
1083       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1084     }
1085     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1086   }
1087   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1088 
1089   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1090   *outis = is;
1091 
1092   if (!foundpart) {
1093 
1094     /* Partitioning by contiguous chunks of rows */
1095 
1096     PetscInt mbs   = (rend-rstart)/bs;
1097     PetscInt start = rstart;
1098     for (i=0; i<n; i++) {
1099       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1100       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1101       start += count;
1102     }
1103 
1104   } else {
1105 
1106     /* Partitioning by adjacency of diagonal block  */
1107 
1108     const PetscInt *numbering;
1109     PetscInt       *count,nidx,*indices,*newidx,start=0;
1110     /* Get node count in each partition */
1111     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1112     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1113     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1114       for (i=0; i<n; i++) count[i] *= bs;
1115     }
1116     /* Build indices from node numbering */
1117     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1118     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1119     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1120     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1121     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1122     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1123     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1124       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1125       for (i=0; i<nidx; i++)
1126 	for (j=0; j<bs; j++)
1127 	  newidx[i*bs+j] = indices[i]*bs + j;
1128       ierr = PetscFree(indices);CHKERRQ(ierr);
1129       nidx   *= bs;
1130       indices = newidx;
1131     }
1132     /* Shift to get global indices */
1133     for (i=0; i<nidx; i++) indices[i] += rstart;
1134 
1135     /* Build the index sets for each block */
1136     for (i=0; i<n; i++) {
1137       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr);
1138       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1139       start += count[i];
1140     }
1141 
1142     ierr = PetscFree(count);
1143     ierr = PetscFree(indices);
1144     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1145     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1146 
1147   }
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "PCASMDestroySubdomains"
1154 /*@C
1155    PCASMDestroySubdomains - Destroys the index sets created with
1156    PCASMCreateSubdomains(). Should be called after setting subdomains
1157    with PCASMSetLocalSubdomains().
1158 
1159    Collective
1160 
1161    Input Parameters:
1162 +  n - the number of index sets
1163 .  is - the array of index sets
1164 -  is_local - the array of local index sets, can be PETSC_NULL
1165 
1166    Level: advanced
1167 
1168 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1169 
1170 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1171 @*/
1172 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1173 {
1174   PetscInt       i;
1175   PetscErrorCode ierr;
1176   PetscFunctionBegin;
1177   if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1178   PetscValidPointer(is,2);
1179   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1180   ierr = PetscFree(is);CHKERRQ(ierr);
1181   if (is_local) {
1182     PetscValidPointer(is_local,3);
1183     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1184     ierr = PetscFree(is_local);CHKERRQ(ierr);
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "PCASMCreateSubdomains2D"
1191 /*@
1192    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1193    preconditioner for a two-dimensional problem on a regular grid.
1194 
1195    Not Collective
1196 
1197    Input Parameters:
1198 +  m, n - the number of mesh points in the x and y directions
1199 .  M, N - the number of subdomains in the x and y directions
1200 .  dof - degrees of freedom per node
1201 -  overlap - overlap in mesh lines
1202 
1203    Output Parameters:
1204 +  Nsub - the number of subdomains created
1205 -  is - the array of index sets defining the subdomains
1206 
1207    Note:
1208    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1209    preconditioners.  More general related routines are
1210    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1211 
1212    Level: advanced
1213 
1214 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1215 
1216 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1217           PCASMSetOverlap()
1218 @*/
1219 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is)
1220 {
1221   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter;
1222   PetscErrorCode ierr;
1223   PetscInt       nidx,*idx,loc,ii,jj,count;
1224 
1225   PetscFunctionBegin;
1226   if (dof != 1) SETERRQ(PETSC_ERR_SUP," ");
1227 
1228   *Nsub = N*M;
1229   ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr);
1230   ystart = 0;
1231   loc_outter = 0;
1232   for (i=0; i<N; i++) {
1233     height = n/N + ((n % N) > i); /* height of subdomain */
1234     if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1235     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1236     yright = ystart + height + overlap; if (yright > n) yright = n;
1237     xstart = 0;
1238     for (j=0; j<M; j++) {
1239       width = m/M + ((m % M) > j); /* width of subdomain */
1240       if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1241       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1242       xright = xstart + width + overlap; if (xright > m) xright = m;
1243       nidx   = (xright - xleft)*(yright - yleft);
1244       ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1245       loc    = 0;
1246       for (ii=yleft; ii<yright; ii++) {
1247         count = m*ii + xleft;
1248         for (jj=xleft; jj<xright; jj++) {
1249           idx[loc++] = count++;
1250         }
1251       }
1252       ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr);
1253       ierr = PetscFree(idx);CHKERRQ(ierr);
1254       xstart += width;
1255     }
1256     ystart += height;
1257   }
1258   for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "PCASMGetLocalSubdomains"
1264 /*@C
1265     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1266     only) for the additive Schwarz preconditioner.
1267 
1268     Collective on PC
1269 
1270     Input Parameter:
1271 .   pc - the preconditioner context
1272 
1273     Output Parameters:
1274 +   n - the number of subdomains for this processor (default value = 1)
1275 .   is - the index sets that define the subdomains for this processor
1276 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1277 
1278 
1279     Notes:
1280     The IS numbering is in the parallel, global numbering of the vector.
1281 
1282     Level: advanced
1283 
1284 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1285 
1286 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1287           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1288 @*/
1289 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1290 {
1291   PC_ASM         *osm;
1292   PetscErrorCode ierr;
1293   PetscTruth     match;
1294 
1295   PetscFunctionBegin;
1296   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1297   PetscValidIntPointer(n,2);
1298   if (is) PetscValidPointer(is,3);
1299   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1300   if (!match) {
1301     if (n)  *n  = 0;
1302     if (is) *is = PETSC_NULL;
1303   } else {
1304     osm = (PC_ASM*)pc->data;
1305     if (n)  *n  = osm->n_local_true;
1306     if (is) *is = osm->is;
1307     if (is_local) *is_local = osm->is_local;
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "PCASMGetLocalSubmatrices"
1314 /*@C
1315     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1316     only) for the additive Schwarz preconditioner.
1317 
1318     Collective on PC
1319 
1320     Input Parameter:
1321 .   pc - the preconditioner context
1322 
1323     Output Parameters:
1324 +   n - the number of matrices for this processor (default value = 1)
1325 -   mat - the matrices
1326 
1327 
1328     Level: advanced
1329 
1330 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1331 
1332 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1333           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1334 @*/
1335 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1336 {
1337   PC_ASM         *osm;
1338   PetscErrorCode ierr;
1339   PetscTruth     match;
1340 
1341   PetscFunctionBegin;
1342   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1343   PetscValidIntPointer(n,2);
1344   if (mat) PetscValidPointer(mat,3);
1345   if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1346   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1347   if (!match) {
1348     if (n)   *n   = 0;
1349     if (mat) *mat = PETSC_NULL;
1350   } else {
1351     osm = (PC_ASM*)pc->data;
1352     if (n)   *n   = osm->n_local_true;
1353     if (mat) *mat = osm->pmat;
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357