xref: /petsc/src/dm/impls/da/fdda.c (revision 7906f579741d91758847a3d807721c3c4f4dc4fa)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
15 
16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
17 {
18   PetscErrorCode ierr;
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
53     of the matrix returned by DMCreateMatrix().
54 
55     Logically Collective on DMDA
56 
57     Input Parameter:
58 +   da - the distributed array
59 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
60 -   ofill - the fill pattern in the off-diagonal blocks
61 
62 
63     Level: developer
64 
65     Notes: This only makes sense when you are doing multicomponent problems but using the
66        MPIAIJ matrix format
67 
68            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
69        representing coupling and 0 entries for missing coupling. For example
70 $             dfill[9] = {1, 0, 0,
71 $                         1, 1, 0,
72 $                         0, 1, 1}
73        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
74        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
75        diagonal block).
76 
77      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
78      can be represented in the dfill, ofill format
79 
80    Contributed by Glenn Hammond
81 
82 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
83 
84 @*/
85 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
86 {
87   DM_DA          *dd = (DM_DA*)da->data;
88   PetscErrorCode ierr;
89   PetscInt       i,k,cnt = 1;
90 
91   PetscFunctionBegin;
92   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
93   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
94 
95   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
96    columns to the left with any nonzeros in them plus 1 */
97   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
98   for (i=0; i<dd->w; i++) {
99     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
100   }
101   for (i=0; i<dd->w; i++) {
102     if (dd->ofillcols[i]) {
103       dd->ofillcols[i] = cnt++;
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
111 {
112   PetscErrorCode   ierr;
113   PetscInt         dim,m,n,p,nc;
114   DMBoundaryType bx,by,bz;
115   MPI_Comm         comm;
116   PetscMPIInt      size;
117   PetscBool        isBAIJ;
118   DM_DA            *dd = (DM_DA*)da->data;
119 
120   PetscFunctionBegin;
121   /*
122                                   m
123           ------------------------------------------------------
124          |                                                     |
125          |                                                     |
126          |               ----------------------                |
127          |               |                    |                |
128       n  |           yn  |                    |                |
129          |               |                    |                |
130          |               .---------------------                |
131          |             (xs,ys)     xn                          |
132          |            .                                        |
133          |         (gxs,gys)                                   |
134          |                                                     |
135           -----------------------------------------------------
136   */
137 
138   /*
139          nc - number of components per grid point
140          col - number of colors needed in one direction for single component problem
141 
142   */
143   ierr = DMDAGetInfo(da,&dim,0,0,0,&m,&n,&p,&nc,0,&bx,&by,&bz,0);CHKERRQ(ierr);
144 
145   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147   if (ctype == IS_COLORING_LOCAL) {
148     if (size == 1) {
149       ctype = IS_COLORING_GLOBAL;
150     } else if (dim > 1) {
151       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
152         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
153       }
154     }
155   }
156 
157   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
158      matrices is for the blocks, not the individual matrix elements  */
159   ierr = PetscStrcmp(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
160   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
161   if (!isBAIJ) {ierr = PetscStrcmp(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
162   if (isBAIJ) {
163     dd->w  = 1;
164     dd->xs = dd->xs/nc;
165     dd->xe = dd->xe/nc;
166     dd->Xs = dd->Xs/nc;
167     dd->Xe = dd->Xe/nc;
168   }
169 
170   /*
171      We do not provide a getcoloring function in the DMDA operations because
172    the basic DMDA does not know about matrices. We think of DMDA as being more
173    more low-level then matrices.
174   */
175   if (dim == 1) {
176     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
177   } else if (dim == 2) {
178     ierr =  DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
179   } else if (dim == 3) {
180     ierr =  DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
181   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
182   if (isBAIJ) {
183     dd->w  = nc;
184     dd->xs = dd->xs*nc;
185     dd->xe = dd->xe*nc;
186     dd->Xs = dd->Xs*nc;
187     dd->Xe = dd->Xe*nc;
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 /* ---------------------------------------------------------------------------------*/
193 
194 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
195 {
196   PetscErrorCode   ierr;
197   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
198   PetscInt         ncolors;
199   MPI_Comm         comm;
200   DMBoundaryType bx,by;
201   DMDAStencilType  st;
202   ISColoringValue  *colors;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207          nc - number of components per grid point
208          col - number of colors needed in one direction for single component problem
209 
210   */
211   ierr = DMDAGetInfo(da,&dim,&m,&n,0,&M,&N,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
212   col  = 2*s + 1;
213   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
214   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
215   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
216 
217   /* special case as taught to us by Paul Hovland */
218   if (st == DMDA_STENCIL_STAR && s == 1) {
219     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
220   } else {
221 
222     if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X (%d) is divisible\n\
223                                                             by 2*stencil_width + 1 (%d)\n", m, col);
224     if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y (%d) is divisible\n\
225                                                             by 2*stencil_width + 1 (%d)\n", n, col);
226     if (ctype == IS_COLORING_GLOBAL) {
227       if (!dd->localcoloring) {
228         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
229         ii   = 0;
230         for (j=ys; j<ys+ny; j++) {
231           for (i=xs; i<xs+nx; i++) {
232             for (k=0; k<nc; k++) {
233               colors[ii++] = k + nc*((i % col) + col*(j % col));
234             }
235           }
236         }
237         ncolors = nc + nc*(col-1 + col*(col-1));
238         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
239       }
240       *coloring = dd->localcoloring;
241     } else if (ctype == IS_COLORING_LOCAL) {
242       if (!dd->ghostedcoloring) {
243         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
244         ii   = 0;
245         for (j=gys; j<gys+gny; j++) {
246           for (i=gxs; i<gxs+gnx; i++) {
247             for (k=0; k<nc; k++) {
248               /* the complicated stuff is to handle periodic boundaries */
249               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
250             }
251           }
252         }
253         ncolors = nc + nc*(col - 1 + col*(col-1));
254         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
255         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
256 
257         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
258       }
259       *coloring = dd->ghostedcoloring;
260     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
261   }
262   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 /* ---------------------------------------------------------------------------------*/
267 
268 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
269 {
270   PetscErrorCode   ierr;
271   PetscInt         xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
272   PetscInt         ncolors;
273   MPI_Comm         comm;
274   DMBoundaryType bx,by,bz;
275   DMDAStencilType  st;
276   ISColoringValue  *colors;
277   DM_DA            *dd = (DM_DA*)da->data;
278 
279   PetscFunctionBegin;
280   /*
281          nc - number of components per grid point
282          col - number of colors needed in one direction for single component problem
283 
284   */
285   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
286   col  = 2*s + 1;
287   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
288                                                          by 2*stencil_width + 1\n");
289   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
290                                                          by 2*stencil_width + 1\n");
291   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
292                                                          by 2*stencil_width + 1\n");
293 
294   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
295   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
296   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
297 
298   /* create the coloring */
299   if (ctype == IS_COLORING_GLOBAL) {
300     if (!dd->localcoloring) {
301       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
302       ii   = 0;
303       for (k=zs; k<zs+nz; k++) {
304         for (j=ys; j<ys+ny; j++) {
305           for (i=xs; i<xs+nx; i++) {
306             for (l=0; l<nc; l++) {
307               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
308             }
309           }
310         }
311       }
312       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
313       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
314     }
315     *coloring = dd->localcoloring;
316   } else if (ctype == IS_COLORING_LOCAL) {
317     if (!dd->ghostedcoloring) {
318       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
319       ii   = 0;
320       for (k=gzs; k<gzs+gnz; k++) {
321         for (j=gys; j<gys+gny; j++) {
322           for (i=gxs; i<gxs+gnx; i++) {
323             for (l=0; l<nc; l++) {
324               /* the complicated stuff is to handle periodic boundaries */
325               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
326             }
327           }
328         }
329       }
330       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
331       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
332       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
333     }
334     *coloring = dd->ghostedcoloring;
335   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
336   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 /* ---------------------------------------------------------------------------------*/
341 
342 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
343 {
344   PetscErrorCode   ierr;
345   PetscInt         xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
346   PetscInt         ncolors;
347   MPI_Comm         comm;
348   DMBoundaryType bx;
349   ISColoringValue  *colors;
350   DM_DA            *dd = (DM_DA*)da->data;
351 
352   PetscFunctionBegin;
353   /*
354          nc - number of components per grid point
355          col - number of colors needed in one direction for single component problem
356 
357   */
358   ierr = DMDAGetInfo(da,&dim,&m,0,0,&M,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
359   col  = 2*s + 1;
360 
361   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ2(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points %d is divisible\n\
362                                                           by 2*stencil_width + 1 %d\n",(int)m,(int)col);
363 
364   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
365   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
366   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
367 
368   /* create the coloring */
369   if (ctype == IS_COLORING_GLOBAL) {
370     if (!dd->localcoloring) {
371       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
372       if (dd->ofillcols) {
373         PetscInt tc = 0;
374         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
375         i1 = 0;
376         for (i=xs; i<xs+nx; i++) {
377           for (l=0; l<nc; l++) {
378             if (dd->ofillcols[l] && (i % col)) {
379               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
380             } else {
381               colors[i1++] = l;
382             }
383           }
384         }
385         ncolors = nc + 2*s*tc;
386       } else {
387         i1 = 0;
388         for (i=xs; i<xs+nx; i++) {
389           for (l=0; l<nc; l++) {
390             colors[i1++] = l + nc*(i % col);
391           }
392         }
393         ncolors = nc + nc*(col-1);
394       }
395       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
396     }
397     *coloring = dd->localcoloring;
398   } else if (ctype == IS_COLORING_LOCAL) {
399     if (!dd->ghostedcoloring) {
400       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
401       i1   = 0;
402       for (i=gxs; i<gxs+gnx; i++) {
403         for (l=0; l<nc; l++) {
404           /* the complicated stuff is to handle periodic boundaries */
405           colors[i1++] = l + nc*(SetInRange(i,m) % col);
406         }
407       }
408       ncolors = nc + nc*(col-1);
409       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
410       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
411     }
412     *coloring = dd->ghostedcoloring;
413   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
414   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
419 {
420   PetscErrorCode   ierr;
421   PetscInt         xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
422   PetscInt         ncolors;
423   MPI_Comm         comm;
424   DMBoundaryType bx,by;
425   ISColoringValue  *colors;
426   DM_DA            *dd = (DM_DA*)da->data;
427 
428   PetscFunctionBegin;
429   /*
430          nc - number of components per grid point
431          col - number of colors needed in one direction for single component problem
432 
433   */
434   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,0);CHKERRQ(ierr);
435   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
436   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
437   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
438 
439   if (bx == DM_BOUNDARY_PERIODIC && (m % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible by 5\n");
440   if (by == DM_BOUNDARY_PERIODIC && (n % 5)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible by 5\n");
441 
442   /* create the coloring */
443   if (ctype == IS_COLORING_GLOBAL) {
444     if (!dd->localcoloring) {
445       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
446       ii   = 0;
447       for (j=ys; j<ys+ny; j++) {
448         for (i=xs; i<xs+nx; i++) {
449           for (k=0; k<nc; k++) {
450             colors[ii++] = k + nc*((3*j+i) % 5);
451           }
452         }
453       }
454       ncolors = 5*nc;
455       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
461       ii = 0;
462       for (j=gys; j<gys+gny; j++) {
463         for (i=gxs; i<gxs+gnx; i++) {
464           for (k=0; k<nc; k++) {
465             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
466           }
467         }
468       }
469       ncolors = 5*nc;
470       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
471       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
472     }
473     *coloring = dd->ghostedcoloring;
474   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
475   PetscFunctionReturn(0);
476 }
477 
478 /* =========================================================================== */
479 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
480 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
481 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
482 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
483 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
484 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
485 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
486 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
487 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
488 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
489 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIELL(DM,Mat);
490 
491 /*@C
492    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
493 
494    Logically Collective on Mat
495 
496    Input Parameters:
497 +  mat - the matrix
498 -  da - the da
499 
500    Level: intermediate
501 
502 @*/
503 PetscErrorCode MatSetupDM(Mat mat,DM da)
504 {
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
509   PetscValidHeaderSpecific(da,DM_CLASSID,1);
510   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
515 {
516   DM                da;
517   PetscErrorCode    ierr;
518   const char        *prefix;
519   Mat               Anatural;
520   AO                ao;
521   PetscInt          rstart,rend,*petsc,i;
522   IS                is;
523   MPI_Comm          comm;
524   PetscViewerFormat format;
525 
526   PetscFunctionBegin;
527   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
528   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
529   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
530 
531   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
532   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
533   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
534 
535   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
536   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
537   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
538   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
539   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
540   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
541 
542   /* call viewer on natural ordering */
543   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
544   ierr = ISDestroy(&is);CHKERRQ(ierr);
545   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
546   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
547   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
548   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
549   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 
553 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
554 {
555   DM             da;
556   PetscErrorCode ierr;
557   Mat            Anatural,Aapp;
558   AO             ao;
559   PetscInt       rstart,rend,*app,i,m,n,M,N;
560   IS             is;
561   MPI_Comm       comm;
562 
563   PetscFunctionBegin;
564   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
565   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
566   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
567 
568   /* Load the matrix in natural ordering */
569   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
570   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
571   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
572   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
573   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
574   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
575 
576   /* Map natural ordering to application ordering and create IS */
577   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
578   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
579   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
580   for (i=rstart; i<rend; i++) app[i-rstart] = i;
581   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
582   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
583 
584   /* Do permutation and replace header */
585   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
586   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
587   ierr = ISDestroy(&is);CHKERRQ(ierr);
588   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
593 {
594   PetscErrorCode ierr;
595   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
596   Mat            A;
597   MPI_Comm       comm;
598   MatType        Atype;
599   PetscSection   section, sectionGlobal;
600   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*ell)(void)=NULL;
601   MatType        mtype;
602   PetscMPIInt    size;
603   DM_DA          *dd = (DM_DA*)da->data;
604 
605   PetscFunctionBegin;
606   ierr = MatInitializePackage();CHKERRQ(ierr);
607   mtype = da->mattype;
608 
609   ierr = DMGetDefaultSection(da, &section);CHKERRQ(ierr);
610   if (section) {
611     PetscInt  bs = -1;
612     PetscInt  localSize;
613     PetscBool isShell, isBlock, isSeqBlock, isMPIBlock, isSymBlock, isSymSeqBlock, isSymMPIBlock, isSymmetric;
614 
615     ierr = DMGetDefaultGlobalSection(da, &sectionGlobal);CHKERRQ(ierr);
616     ierr = PetscSectionGetConstrainedStorageSize(sectionGlobal, &localSize);CHKERRQ(ierr);
617     ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
618     ierr = MatSetSizes(A,localSize,localSize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
619     ierr = MatSetType(A,mtype);CHKERRQ(ierr);
620     ierr = PetscStrcmp(mtype,MATSHELL,&isShell);CHKERRQ(ierr);
621     ierr = PetscStrcmp(mtype,MATBAIJ,&isBlock);CHKERRQ(ierr);
622     ierr = PetscStrcmp(mtype,MATSEQBAIJ,&isSeqBlock);CHKERRQ(ierr);
623     ierr = PetscStrcmp(mtype,MATMPIBAIJ,&isMPIBlock);CHKERRQ(ierr);
624     ierr = PetscStrcmp(mtype,MATSBAIJ,&isSymBlock);CHKERRQ(ierr);
625     ierr = PetscStrcmp(mtype,MATSEQSBAIJ,&isSymSeqBlock);CHKERRQ(ierr);
626     ierr = PetscStrcmp(mtype,MATMPISBAIJ,&isSymMPIBlock);CHKERRQ(ierr);
627     /* Check for symmetric storage */
628     isSymmetric = (PetscBool) (isSymBlock || isSymSeqBlock || isSymMPIBlock);
629     if (isSymmetric) {
630       ierr = MatSetOption(*J, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE);CHKERRQ(ierr);
631     }
632     if (!isShell) {
633       PetscInt *dnz, *onz, *dnzu, *onzu, bsLocal;
634 
635       if (bs < 0) {
636         if (isBlock || isSeqBlock || isMPIBlock || isSymBlock || isSymSeqBlock || isSymMPIBlock) {
637           PetscInt pStart, pEnd, p, dof;
638 
639           ierr = PetscSectionGetChart(sectionGlobal, &pStart, &pEnd);CHKERRQ(ierr);
640           for (p = pStart; p < pEnd; ++p) {
641             ierr = PetscSectionGetDof(sectionGlobal, p, &dof);CHKERRQ(ierr);
642             if (dof) {
643               bs = dof;
644               break;
645             }
646           }
647         } else {
648           bs = 1;
649         }
650         /* Must have same blocksize on all procs (some might have no points) */
651         bsLocal = bs;
652         ierr    = MPIU_Allreduce(&bsLocal, &bs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
653       }
654       ierr = PetscCalloc4(localSize/bs, &dnz, localSize/bs, &onz, localSize/bs, &dnzu, localSize/bs, &onzu);CHKERRQ(ierr);
655       /* ierr = DMPlexPreallocateOperator(dm, bs, section, sectionGlobal, dnz, onz, dnzu, onzu, *J, fillMatrix);CHKERRQ(ierr); */
656       ierr = PetscFree4(dnz, onz, dnzu, onzu);CHKERRQ(ierr);
657     }
658   }
659   /*
660                                   m
661           ------------------------------------------------------
662          |                                                     |
663          |                                                     |
664          |               ----------------------                |
665          |               |                    |                |
666       n  |           ny  |                    |                |
667          |               |                    |                |
668          |               .---------------------                |
669          |             (xs,ys)     nx                          |
670          |            .                                        |
671          |         (gxs,gys)                                   |
672          |                                                     |
673           -----------------------------------------------------
674   */
675 
676   /*
677          nc - number of components per grid point
678          col - number of colors needed in one direction for single component problem
679 
680   */
681   M   = dd->M;
682   N   = dd->N;
683   P   = dd->P;
684   dim = da->dim;
685   dof = dd->w;
686   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,0,0,0,&dof,0,0,0,0,0);CHKERRQ(ierr); */
687   ierr = DMDAGetCorners(da,0,0,0,&nx,&ny,&nz);CHKERRQ(ierr);
688   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
689   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
690   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
691   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
692   ierr = MatSetDM(A,da);CHKERRQ(ierr);
693   if (da->structure_only) {
694     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
695   }
696   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
697   /*
698      We do not provide a getmatrix function in the DMDA operations because
699    the basic DMDA does not know about matrices. We think of DMDA as being more
700    more low-level than matrices. This is kind of cheating but, cause sometimes
701    we think of DMDA has higher level than matrices.
702 
703      We could switch based on Atype (or mtype), but we do not since the
704    specialized setting routines depend only the particular preallocation
705    details of the matrix, not the type itself.
706   */
707   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
708   if (!aij) {
709     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
710   }
711   if (!aij) {
712     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
713     if (!baij) {
714       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
715     }
716     if (!baij) {
717       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
718       if (!sbaij) {
719         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
720       }
721       if (!sbaij) {
722         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIELLSetPreallocation_C",&ell);CHKERRQ(ierr);
723         if (!ell) {
724           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqELLSetPreallocation_C",&ell);CHKERRQ(ierr);
725         }
726       }
727     }
728   }
729   if (aij) {
730     if (dim == 1) {
731       if (dd->ofill) {
732         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
733       } else {
734         ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
735       }
736     } else if (dim == 2) {
737       if (dd->ofill) {
738         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
739       } else {
740         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
741       }
742     } else if (dim == 3) {
743       if (dd->ofill) {
744         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
745       } else {
746         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
747       }
748     }
749   } else if (baij) {
750     if (dim == 2) {
751       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
752     } else if (dim == 3) {
753       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
754     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
755   } else if (sbaij) {
756     if (dim == 2) {
757       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
758     } else if (dim == 3) {
759       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
760     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
761   } else if (ell) {
762      if (dim ==2) {
763        ierr = DMCreateMatrix_DA_2d_MPIELL(da,A);CHKERRQ(ierr);
764      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
765   }else {
766     ISLocalToGlobalMapping ltog;
767     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
768     ierr = MatSetUp(A);CHKERRQ(ierr);
769     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
770     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
771   }
772   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
773   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
774   ierr = MatSetDM(A,da);CHKERRQ(ierr);
775   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
776   if (size > 1) {
777     /* change viewer to display matrix in natural ordering */
778     ierr = MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
779     ierr = MatShellSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
780   }
781   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
782   *J = A;
783   PetscFunctionReturn(0);
784 }
785 
786 /* ---------------------------------------------------------------------------------*/
787 PetscErrorCode DMCreateMatrix_DA_2d_MPIELL(DM da,Mat J)
788 {
789   PetscErrorCode         ierr;
790   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
791   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
792   MPI_Comm               comm;
793   PetscScalar            *values;
794   DMBoundaryType         bx,by;
795   ISLocalToGlobalMapping ltog;
796   DMDAStencilType        st;
797 
798   PetscFunctionBegin;
799   /*
800          nc - number of components per grid point
801          col - number of colors needed in one direction for single component problem
802 
803   */
804   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
805   col  = 2*s + 1;
806   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
807   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
808   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
809 
810   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
811   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
812 
813   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
814   /* determine the matrix preallocation information */
815   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
816   for (i=xs; i<xs+nx; i++) {
817 
818     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
819     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
820 
821     for (j=ys; j<ys+ny; j++) {
822       slot = i - gxs + gnx*(j - gys);
823 
824       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
825       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
826 
827       cnt = 0;
828       for (k=0; k<nc; k++) {
829         for (l=lstart; l<lend+1; l++) {
830           for (p=pstart; p<pend+1; p++) {
831             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
832               cols[cnt++] = k + nc*(slot + gnx*l + p);
833             }
834           }
835         }
836         rows[k] = k + nc*(slot);
837       }
838       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
839     }
840   }
841   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
842   ierr = MatSeqELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
843   ierr = MatMPIELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
844   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
845 
846   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
847 
848   /*
849     For each node in the grid: we get the neighbors in the local (on processor ordering
850     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
851     PETSc ordering.
852   */
853   if (!da->prealloc_only) {
854     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
855     for (i=xs; i<xs+nx; i++) {
856 
857       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
858       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
859 
860       for (j=ys; j<ys+ny; j++) {
861         slot = i - gxs + gnx*(j - gys);
862 
863         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
864         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
865 
866         cnt = 0;
867         for (k=0; k<nc; k++) {
868           for (l=lstart; l<lend+1; l++) {
869             for (p=pstart; p<pend+1; p++) {
870               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
871                 cols[cnt++] = k + nc*(slot + gnx*l + p);
872               }
873             }
874           }
875           rows[k] = k + nc*(slot);
876         }
877         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
878       }
879     }
880     ierr = PetscFree(values);CHKERRQ(ierr);
881     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
882     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
884   }
885   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
890 {
891   PetscErrorCode         ierr;
892   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
893   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
894   MPI_Comm               comm;
895   PetscScalar            *values;
896   DMBoundaryType         bx,by;
897   ISLocalToGlobalMapping ltog;
898   DMDAStencilType        st;
899   PetscBool              removedups = PETSC_FALSE;
900 
901   PetscFunctionBegin;
902   /*
903          nc - number of components per grid point
904          col - number of colors needed in one direction for single component problem
905 
906   */
907   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
908   col  = 2*s + 1;
909   /*
910        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
911        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
912   */
913   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
914   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
915   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
916   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
917   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
918 
919   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
920   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
921 
922   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
923   /* determine the matrix preallocation information */
924   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
925   for (i=xs; i<xs+nx; i++) {
926 
927     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
928     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
929 
930     for (j=ys; j<ys+ny; j++) {
931       slot = i - gxs + gnx*(j - gys);
932 
933       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
934       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
935 
936       cnt = 0;
937       for (k=0; k<nc; k++) {
938         for (l=lstart; l<lend+1; l++) {
939           for (p=pstart; p<pend+1; p++) {
940             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
941               cols[cnt++] = k + nc*(slot + gnx*l + p);
942             }
943           }
944         }
945         rows[k] = k + nc*(slot);
946       }
947       if (removedups) {
948         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
949       } else {
950         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
951       }
952     }
953   }
954   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
955   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
956   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
957   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
958 
959   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
960 
961   /*
962     For each node in the grid: we get the neighbors in the local (on processor ordering
963     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
964     PETSc ordering.
965   */
966   if (!da->prealloc_only) {
967     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
968     for (i=xs; i<xs+nx; i++) {
969 
970       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
971       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
972 
973       for (j=ys; j<ys+ny; j++) {
974         slot = i - gxs + gnx*(j - gys);
975 
976         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
977         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
978 
979         cnt = 0;
980         for (k=0; k<nc; k++) {
981           for (l=lstart; l<lend+1; l++) {
982             for (p=pstart; p<pend+1; p++) {
983               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
984                 cols[cnt++] = k + nc*(slot + gnx*l + p);
985               }
986             }
987           }
988           rows[k] = k + nc*(slot);
989         }
990         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
991       }
992     }
993     ierr = PetscFree(values);CHKERRQ(ierr);
994     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
995     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
996     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
997   }
998   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1003 {
1004   PetscErrorCode         ierr;
1005   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1006   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1007   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1008   DM_DA                  *dd = (DM_DA*)da->data;
1009   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1010   MPI_Comm               comm;
1011   PetscScalar            *values;
1012   DMBoundaryType         bx,by;
1013   ISLocalToGlobalMapping ltog;
1014   DMDAStencilType        st;
1015   PetscBool              removedups = PETSC_FALSE;
1016 
1017   PetscFunctionBegin;
1018   /*
1019          nc - number of components per grid point
1020          col - number of colors needed in one direction for single component problem
1021 
1022   */
1023   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1024   col  = 2*s + 1;
1025   /*
1026        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1027        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1028   */
1029   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1030   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1031   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1032   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1033   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1034 
1035   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1036   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1037 
1038   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1039   /* determine the matrix preallocation information */
1040   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1041   for (i=xs; i<xs+nx; i++) {
1042 
1043     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1044     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1045 
1046     for (j=ys; j<ys+ny; j++) {
1047       slot = i - gxs + gnx*(j - gys);
1048 
1049       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1050       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1051 
1052       for (k=0; k<nc; k++) {
1053         cnt = 0;
1054         for (l=lstart; l<lend+1; l++) {
1055           for (p=pstart; p<pend+1; p++) {
1056             if (l || p) {
1057               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1058                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1059               }
1060             } else {
1061               if (dfill) {
1062                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1063               } else {
1064                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1065               }
1066             }
1067           }
1068         }
1069         row    = k + nc*(slot);
1070         maxcnt = PetscMax(maxcnt,cnt);
1071         if (removedups) {
1072           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1073         } else {
1074           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1075         }
1076       }
1077     }
1078   }
1079   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1080   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1081   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1082   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1083 
1084   /*
1085     For each node in the grid: we get the neighbors in the local (on processor ordering
1086     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1087     PETSc ordering.
1088   */
1089   if (!da->prealloc_only) {
1090     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
1091     for (i=xs; i<xs+nx; i++) {
1092 
1093       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1094       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1095 
1096       for (j=ys; j<ys+ny; j++) {
1097         slot = i - gxs + gnx*(j - gys);
1098 
1099         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1100         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1101 
1102         for (k=0; k<nc; k++) {
1103           cnt = 0;
1104           for (l=lstart; l<lend+1; l++) {
1105             for (p=pstart; p<pend+1; p++) {
1106               if (l || p) {
1107                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1108                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1109                 }
1110               } else {
1111                 if (dfill) {
1112                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1113                 } else {
1114                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1115                 }
1116               }
1117             }
1118           }
1119           row  = k + nc*(slot);
1120           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1121         }
1122       }
1123     }
1124     ierr = PetscFree(values);CHKERRQ(ierr);
1125     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1126     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1127     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1128   }
1129   ierr = PetscFree(cols);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 /* ---------------------------------------------------------------------------------*/
1134 
1135 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1136 {
1137   PetscErrorCode         ierr;
1138   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1139   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1140   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1141   MPI_Comm               comm;
1142   PetscScalar            *values;
1143   DMBoundaryType         bx,by,bz;
1144   ISLocalToGlobalMapping ltog;
1145   DMDAStencilType        st;
1146   PetscBool              removedups = PETSC_FALSE;
1147 
1148   PetscFunctionBegin;
1149   /*
1150          nc - number of components per grid point
1151          col - number of colors needed in one direction for single component problem
1152 
1153   */
1154   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1155   col  = 2*s + 1;
1156 
1157   /*
1158        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1159        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1160   */
1161   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1162   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1163   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1164 
1165   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1166   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1167   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1168 
1169   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1170   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1171 
1172   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1173   /* determine the matrix preallocation information */
1174   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1175   for (i=xs; i<xs+nx; i++) {
1176     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1177     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1178     for (j=ys; j<ys+ny; j++) {
1179       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1180       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1181       for (k=zs; k<zs+nz; k++) {
1182         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1183         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1184 
1185         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1186 
1187         cnt = 0;
1188         for (l=0; l<nc; l++) {
1189           for (ii=istart; ii<iend+1; ii++) {
1190             for (jj=jstart; jj<jend+1; jj++) {
1191               for (kk=kstart; kk<kend+1; kk++) {
1192                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1193                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1194                 }
1195               }
1196             }
1197           }
1198           rows[l] = l + nc*(slot);
1199         }
1200         if (removedups) {
1201           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1202         } else {
1203           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1204         }
1205       }
1206     }
1207   }
1208   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1209   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1210   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1211   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1212   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1213 
1214   /*
1215     For each node in the grid: we get the neighbors in the local (on processor ordering
1216     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1217     PETSc ordering.
1218   */
1219   if (!da->prealloc_only) {
1220     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1221     for (i=xs; i<xs+nx; i++) {
1222       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1223       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1224       for (j=ys; j<ys+ny; j++) {
1225         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1226         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1227         for (k=zs; k<zs+nz; k++) {
1228           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1229           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1230 
1231           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1232 
1233           cnt = 0;
1234           for (l=0; l<nc; l++) {
1235             for (ii=istart; ii<iend+1; ii++) {
1236               for (jj=jstart; jj<jend+1; jj++) {
1237                 for (kk=kstart; kk<kend+1; kk++) {
1238                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1239                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1240                   }
1241                 }
1242               }
1243             }
1244             rows[l] = l + nc*(slot);
1245           }
1246           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250     ierr = PetscFree(values);CHKERRQ(ierr);
1251     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1252     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1253     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1254   }
1255   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 /* ---------------------------------------------------------------------------------*/
1260 
1261 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1262 {
1263   PetscErrorCode         ierr;
1264   DM_DA                  *dd = (DM_DA*)da->data;
1265   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1266   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1267   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1268   PetscScalar            *values;
1269   DMBoundaryType         bx;
1270   ISLocalToGlobalMapping ltog;
1271   PetscMPIInt            rank,size;
1272 
1273   PetscFunctionBegin;
1274   if (dd->bx == DM_BOUNDARY_PERIODIC) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"With fill provided not implemented with periodic boundary conditions");
1275   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
1276   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
1277 
1278   /*
1279          nc - number of components per grid point
1280 
1281   */
1282   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1283   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1284   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1285 
1286   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1287   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1288 
1289   /*
1290         note should be smaller for first and last process with no periodic
1291         does not handle dfill
1292   */
1293   cnt = 0;
1294   /* coupling with process to the left */
1295   for (i=0; i<s; i++) {
1296     for (j=0; j<nc; j++) {
1297       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1298       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1299       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1300       cnt++;
1301     }
1302   }
1303   for (i=s; i<nx-s; i++) {
1304     for (j=0; j<nc; j++) {
1305       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1306       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1307       cnt++;
1308     }
1309   }
1310   /* coupling with process to the right */
1311   for (i=nx-s; i<nx; i++) {
1312     for (j=0; j<nc; j++) {
1313       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1314       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1315       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1316       cnt++;
1317     }
1318   }
1319 
1320   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1321   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1322   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1323 
1324   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1325   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1326 
1327   /*
1328     For each node in the grid: we get the neighbors in the local (on processor ordering
1329     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1330     PETSc ordering.
1331   */
1332   if (!da->prealloc_only) {
1333     ierr = PetscCalloc2(maxcnt,&values,maxcnt,&cols);CHKERRQ(ierr);
1334 
1335     row = xs*nc;
1336     /* coupling with process to the left */
1337     for (i=xs; i<xs+s; i++) {
1338       for (j=0; j<nc; j++) {
1339         cnt = 0;
1340         if (rank) {
1341           for (l=0; l<s; l++) {
1342             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1343           }
1344         }
1345         if (dfill) {
1346           for (k=dfill[j]; k<dfill[j+1]; k++) {
1347             cols[cnt++] = i*nc + dfill[k];
1348           }
1349         } else {
1350           for (k=0; k<nc; k++) {
1351             cols[cnt++] = i*nc + k;
1352           }
1353         }
1354         for (l=0; l<s; l++) {
1355           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1356         }
1357         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1358         row++;
1359       }
1360     }
1361     for (i=xs+s; i<xs+nx-s; i++) {
1362       for (j=0; j<nc; j++) {
1363         cnt = 0;
1364         for (l=0; l<s; l++) {
1365           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1366         }
1367         if (dfill) {
1368           for (k=dfill[j]; k<dfill[j+1]; k++) {
1369             cols[cnt++] = i*nc + dfill[k];
1370           }
1371         } else {
1372           for (k=0; k<nc; k++) {
1373             cols[cnt++] = i*nc + k;
1374           }
1375         }
1376         for (l=0; l<s; l++) {
1377           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1378         }
1379         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1380         row++;
1381       }
1382     }
1383     /* coupling with process to the right */
1384     for (i=xs+nx-s; i<xs+nx; i++) {
1385       for (j=0; j<nc; j++) {
1386         cnt = 0;
1387         for (l=0; l<s; l++) {
1388           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1389         }
1390         if (dfill) {
1391           for (k=dfill[j]; k<dfill[j+1]; k++) {
1392             cols[cnt++] = i*nc + dfill[k];
1393           }
1394         } else {
1395           for (k=0; k<nc; k++) {
1396             cols[cnt++] = i*nc + k;
1397           }
1398         }
1399         if (rank < size-1) {
1400           for (l=0; l<s; l++) {
1401             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1402           }
1403         }
1404         ierr = MatSetValues(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1405         row++;
1406       }
1407     }
1408     ierr = PetscFree2(values,cols);CHKERRQ(ierr);
1409     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /* ---------------------------------------------------------------------------------*/
1417 
1418 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1419 {
1420   PetscErrorCode         ierr;
1421   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1422   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1423   PetscInt               istart,iend;
1424   PetscScalar            *values;
1425   DMBoundaryType         bx;
1426   ISLocalToGlobalMapping ltog;
1427 
1428   PetscFunctionBegin;
1429   /*
1430          nc - number of components per grid point
1431          col - number of colors needed in one direction for single component problem
1432 
1433   */
1434   ierr = DMDAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&bx,0,0,0);CHKERRQ(ierr);
1435   col  = 2*s + 1;
1436 
1437   ierr = DMDAGetCorners(da,&xs,0,0,&nx,0,0);CHKERRQ(ierr);
1438   ierr = DMDAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);CHKERRQ(ierr);
1439 
1440   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1441   ierr = MatSeqAIJSetPreallocation(J,col*nc,0);CHKERRQ(ierr);
1442   ierr = MatMPIAIJSetPreallocation(J,col*nc,0,col*nc,0);CHKERRQ(ierr);
1443 
1444   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1445   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1446 
1447   /*
1448     For each node in the grid: we get the neighbors in the local (on processor ordering
1449     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1450     PETSc ordering.
1451   */
1452   if (!da->prealloc_only) {
1453     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1454     ierr = PetscCalloc1(col*nc*nc,&values);CHKERRQ(ierr);
1455     for (i=xs; i<xs+nx; i++) {
1456       istart = PetscMax(-s,gxs - i);
1457       iend   = PetscMin(s,gxs + gnx - i - 1);
1458       slot   = i - gxs;
1459 
1460       cnt = 0;
1461       for (l=0; l<nc; l++) {
1462         for (i1=istart; i1<iend+1; i1++) {
1463           cols[cnt++] = l + nc*(slot + i1);
1464         }
1465         rows[l] = l + nc*(slot);
1466       }
1467       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1468     }
1469     ierr = PetscFree(values);CHKERRQ(ierr);
1470     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1471     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1472     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1473     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1479 {
1480   PetscErrorCode         ierr;
1481   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1482   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1483   PetscInt               istart,iend,jstart,jend,ii,jj;
1484   MPI_Comm               comm;
1485   PetscScalar            *values;
1486   DMBoundaryType         bx,by;
1487   DMDAStencilType        st;
1488   ISLocalToGlobalMapping ltog;
1489 
1490   PetscFunctionBegin;
1491   /*
1492      nc - number of components per grid point
1493      col - number of colors needed in one direction for single component problem
1494   */
1495   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1496   col  = 2*s + 1;
1497 
1498   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1499   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1500   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1501 
1502   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1503 
1504   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1505 
1506   /* determine the matrix preallocation information */
1507   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1508   for (i=xs; i<xs+nx; i++) {
1509     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1510     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1511     for (j=ys; j<ys+ny; j++) {
1512       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1513       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1514       slot   = i - gxs + gnx*(j - gys);
1515 
1516       /* Find block columns in block row */
1517       cnt = 0;
1518       for (ii=istart; ii<iend+1; ii++) {
1519         for (jj=jstart; jj<jend+1; jj++) {
1520           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1521             cols[cnt++] = slot + ii + gnx*jj;
1522           }
1523         }
1524       }
1525       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1526     }
1527   }
1528   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1529   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1530   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1531 
1532   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1533 
1534   /*
1535     For each node in the grid: we get the neighbors in the local (on processor ordering
1536     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1537     PETSc ordering.
1538   */
1539   if (!da->prealloc_only) {
1540     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1541     for (i=xs; i<xs+nx; i++) {
1542       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1543       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1544       for (j=ys; j<ys+ny; j++) {
1545         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1546         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1547         slot = i - gxs + gnx*(j - gys);
1548         cnt  = 0;
1549         for (ii=istart; ii<iend+1; ii++) {
1550           for (jj=jstart; jj<jend+1; jj++) {
1551             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1552               cols[cnt++] = slot + ii + gnx*jj;
1553             }
1554           }
1555         }
1556         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1557       }
1558     }
1559     ierr = PetscFree(values);CHKERRQ(ierr);
1560     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1562     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1563   }
1564   ierr = PetscFree(cols);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1569 {
1570   PetscErrorCode         ierr;
1571   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1572   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1573   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1574   MPI_Comm               comm;
1575   PetscScalar            *values;
1576   DMBoundaryType         bx,by,bz;
1577   DMDAStencilType        st;
1578   ISLocalToGlobalMapping ltog;
1579 
1580   PetscFunctionBegin;
1581   /*
1582          nc - number of components per grid point
1583          col - number of colors needed in one direction for single component problem
1584 
1585   */
1586   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1587   col  = 2*s + 1;
1588 
1589   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1590   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1591   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1592 
1593   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1594 
1595   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1596 
1597   /* determine the matrix preallocation information */
1598   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1599   for (i=xs; i<xs+nx; i++) {
1600     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1601     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1602     for (j=ys; j<ys+ny; j++) {
1603       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1604       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1605       for (k=zs; k<zs+nz; k++) {
1606         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1607         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1608 
1609         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1610 
1611         /* Find block columns in block row */
1612         cnt = 0;
1613         for (ii=istart; ii<iend+1; ii++) {
1614           for (jj=jstart; jj<jend+1; jj++) {
1615             for (kk=kstart; kk<kend+1; kk++) {
1616               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1617                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1618               }
1619             }
1620           }
1621         }
1622         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1623       }
1624     }
1625   }
1626   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1627   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1628   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1629 
1630   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1631 
1632   /*
1633     For each node in the grid: we get the neighbors in the local (on processor ordering
1634     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1635     PETSc ordering.
1636   */
1637   if (!da->prealloc_only) {
1638     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1639     for (i=xs; i<xs+nx; i++) {
1640       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1641       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1642       for (j=ys; j<ys+ny; j++) {
1643         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1644         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1645         for (k=zs; k<zs+nz; k++) {
1646           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1647           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1648 
1649           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1650 
1651           cnt = 0;
1652           for (ii=istart; ii<iend+1; ii++) {
1653             for (jj=jstart; jj<jend+1; jj++) {
1654               for (kk=kstart; kk<kend+1; kk++) {
1655                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1656                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1657                 }
1658               }
1659             }
1660           }
1661           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1662         }
1663       }
1664     }
1665     ierr = PetscFree(values);CHKERRQ(ierr);
1666     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(cols);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 /*
1675   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1676   identify in the local ordering with periodic domain.
1677 */
1678 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
1679 {
1680   PetscErrorCode ierr;
1681   PetscInt       i,n;
1682 
1683   PetscFunctionBegin;
1684   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
1685   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
1686   for (i=0,n=0; i<*cnt; i++) {
1687     if (col[i] >= *row) col[n++] = col[i];
1688   }
1689   *cnt = n;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
1694 {
1695   PetscErrorCode         ierr;
1696   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1697   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1698   PetscInt               istart,iend,jstart,jend,ii,jj;
1699   MPI_Comm               comm;
1700   PetscScalar            *values;
1701   DMBoundaryType         bx,by;
1702   DMDAStencilType        st;
1703   ISLocalToGlobalMapping ltog;
1704 
1705   PetscFunctionBegin;
1706   /*
1707      nc - number of components per grid point
1708      col - number of colors needed in one direction for single component problem
1709   */
1710   ierr = DMDAGetInfo(da,&dim,&m,&n,0,0,0,0,&nc,&s,&bx,&by,0,&st);CHKERRQ(ierr);
1711   col  = 2*s + 1;
1712 
1713   ierr = DMDAGetCorners(da,&xs,&ys,0,&nx,&ny,0);CHKERRQ(ierr);
1714   ierr = DMDAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);CHKERRQ(ierr);
1715   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1716 
1717   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1718 
1719   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1720 
1721   /* determine the matrix preallocation information */
1722   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1723   for (i=xs; i<xs+nx; i++) {
1724     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1725     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1726     for (j=ys; j<ys+ny; j++) {
1727       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1728       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1729       slot   = i - gxs + gnx*(j - gys);
1730 
1731       /* Find block columns in block row */
1732       cnt = 0;
1733       for (ii=istart; ii<iend+1; ii++) {
1734         for (jj=jstart; jj<jend+1; jj++) {
1735           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1736             cols[cnt++] = slot + ii + gnx*jj;
1737           }
1738         }
1739       }
1740       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1741       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1742     }
1743   }
1744   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1745   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1746   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1747 
1748   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1749 
1750   /*
1751     For each node in the grid: we get the neighbors in the local (on processor ordering
1752     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1753     PETSc ordering.
1754   */
1755   if (!da->prealloc_only) {
1756     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1757     for (i=xs; i<xs+nx; i++) {
1758       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1759       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1760       for (j=ys; j<ys+ny; j++) {
1761         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1762         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1763         slot   = i - gxs + gnx*(j - gys);
1764 
1765         /* Find block columns in block row */
1766         cnt = 0;
1767         for (ii=istart; ii<iend+1; ii++) {
1768           for (jj=jstart; jj<jend+1; jj++) {
1769             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
1770               cols[cnt++] = slot + ii + gnx*jj;
1771             }
1772           }
1773         }
1774         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1775         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1776       }
1777     }
1778     ierr = PetscFree(values);CHKERRQ(ierr);
1779     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1780     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1781     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1782   }
1783   ierr = PetscFree(cols);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
1788 {
1789   PetscErrorCode         ierr;
1790   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1791   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1792   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1793   MPI_Comm               comm;
1794   PetscScalar            *values;
1795   DMBoundaryType         bx,by,bz;
1796   DMDAStencilType        st;
1797   ISLocalToGlobalMapping ltog;
1798 
1799   PetscFunctionBegin;
1800   /*
1801      nc - number of components per grid point
1802      col - number of colors needed in one direction for single component problem
1803   */
1804   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1805   col  = 2*s + 1;
1806 
1807   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1808   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1809   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1810 
1811   /* create the matrix */
1812   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1813 
1814   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1815 
1816   /* determine the matrix preallocation information */
1817   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1818   for (i=xs; i<xs+nx; i++) {
1819     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1820     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1821     for (j=ys; j<ys+ny; j++) {
1822       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1823       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1824       for (k=zs; k<zs+nz; k++) {
1825         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1826         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1827 
1828         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1829 
1830         /* Find block columns in block row */
1831         cnt = 0;
1832         for (ii=istart; ii<iend+1; ii++) {
1833           for (jj=jstart; jj<jend+1; jj++) {
1834             for (kk=kstart; kk<kend+1; kk++) {
1835               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1836                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1837               }
1838             }
1839           }
1840         }
1841         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1842         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
1843       }
1844     }
1845   }
1846   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1847   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1848   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1849 
1850   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1851 
1852   /*
1853     For each node in the grid: we get the neighbors in the local (on processor ordering
1854     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1855     PETSc ordering.
1856   */
1857   if (!da->prealloc_only) {
1858     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1859     for (i=xs; i<xs+nx; i++) {
1860       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1861       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1862       for (j=ys; j<ys+ny; j++) {
1863         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1864         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1865         for (k=zs; k<zs+nz; k++) {
1866           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1867           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1868 
1869           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1870 
1871           cnt = 0;
1872           for (ii=istart; ii<iend+1; ii++) {
1873             for (jj=jstart; jj<jend+1; jj++) {
1874               for (kk=kstart; kk<kend+1; kk++) {
1875                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
1876                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1877                 }
1878               }
1879             }
1880           }
1881           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
1882           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1883         }
1884       }
1885     }
1886     ierr = PetscFree(values);CHKERRQ(ierr);
1887     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1888     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1889     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1890   }
1891   ierr = PetscFree(cols);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 /* ---------------------------------------------------------------------------------*/
1896 
1897 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
1898 {
1899   PetscErrorCode         ierr;
1900   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1901   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
1902   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1903   DM_DA                  *dd = (DM_DA*)da->data;
1904   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
1905   MPI_Comm               comm;
1906   PetscScalar            *values;
1907   DMBoundaryType         bx,by,bz;
1908   ISLocalToGlobalMapping ltog;
1909   DMDAStencilType        st;
1910   PetscBool              removedups = PETSC_FALSE;
1911 
1912   PetscFunctionBegin;
1913   /*
1914          nc - number of components per grid point
1915          col - number of colors needed in one direction for single component problem
1916 
1917   */
1918   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1919   col  = 2*s + 1;
1920   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
1921                  by 2*stencil_width + 1\n");
1922   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
1923                  by 2*stencil_width + 1\n");
1924   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
1925                  by 2*stencil_width + 1\n");
1926 
1927   /*
1928        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1929        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1930   */
1931   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1932   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1933   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1934 
1935   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1936   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1937   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1938 
1939   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
1940   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1941 
1942   /* determine the matrix preallocation information */
1943   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1944 
1945   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1946   for (i=xs; i<xs+nx; i++) {
1947     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1948     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1949     for (j=ys; j<ys+ny; j++) {
1950       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1951       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1952       for (k=zs; k<zs+nz; k++) {
1953         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1954         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1955 
1956         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1957 
1958         for (l=0; l<nc; l++) {
1959           cnt = 0;
1960           for (ii=istart; ii<iend+1; ii++) {
1961             for (jj=jstart; jj<jend+1; jj++) {
1962               for (kk=kstart; kk<kend+1; kk++) {
1963                 if (ii || jj || kk) {
1964                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1965                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1966                   }
1967                 } else {
1968                   if (dfill) {
1969                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1970                   } else {
1971                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1972                   }
1973                 }
1974               }
1975             }
1976           }
1977           row  = l + nc*(slot);
1978           maxcnt = PetscMax(maxcnt,cnt);
1979           if (removedups) {
1980             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1981           } else {
1982             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1983           }
1984         }
1985       }
1986     }
1987   }
1988   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1989   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1990   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1991   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1992 
1993   /*
1994     For each node in the grid: we get the neighbors in the local (on processor ordering
1995     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1996     PETSc ordering.
1997   */
1998   if (!da->prealloc_only) {
1999     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2000     for (i=xs; i<xs+nx; i++) {
2001       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2002       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2003       for (j=ys; j<ys+ny; j++) {
2004         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2005         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2006         for (k=zs; k<zs+nz; k++) {
2007           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2008           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2009 
2010           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2011 
2012           for (l=0; l<nc; l++) {
2013             cnt = 0;
2014             for (ii=istart; ii<iend+1; ii++) {
2015               for (jj=jstart; jj<jend+1; jj++) {
2016                 for (kk=kstart; kk<kend+1; kk++) {
2017                   if (ii || jj || kk) {
2018                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2019                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2020                     }
2021                   } else {
2022                     if (dfill) {
2023                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2024                     } else {
2025                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2026                     }
2027                   }
2028                 }
2029               }
2030             }
2031             row  = l + nc*(slot);
2032             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2033           }
2034         }
2035       }
2036     }
2037     ierr = PetscFree(values);CHKERRQ(ierr);
2038     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2039     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2040     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2041   }
2042   ierr = PetscFree(cols);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045