xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision c034947461d79e6d5831ee73e3d596a8a772b129)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,start,end,l,row,col,N;
12   PetscScalar    dx,*y,*xx,*w3_array;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19   PetscScalar    *ca = csp->a;
20   PetscInt       brow,bcol,bspidx,spidx,nz,nz_i;
21 
22   PetscFunctionBegin;
23   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
24   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
25   if (flg) {
26     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
27   } else {
28     PetscBool assembled;
29     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
30     if (assembled) {
31       ierr = MatZeroEntries(J);CHKERRQ(ierr);
32     }
33   }
34 
35   if (!coloring->vscale) {
36     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
37   }
38 
39   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
40     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
41   }
42   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
43 
44   /* Set w1 = F(x1) */
45   if (!coloring->fset) {
46     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
47     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
48     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
49   } else {
50     coloring->fset = PETSC_FALSE;
51   }
52 
53   if (!coloring->w3) {
54     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
55     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
56   }
57   w3 = coloring->w3;
58 
59   /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */
60   ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr);
61   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
62   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
63 
64   /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
65   for (col=0; col<N; col++) {
66     if (coloring->htype[0] == 'w') {
67       dx = 1.0 + unorm;
68     } else {
69       dx = xx[col];
70     }
71     if (dx == (PetscScalar)0.0) dx = 1.0;
72     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
73     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
74     dx               *= epsilon;
75     vscale_array[col] = (PetscScalar)dx;
76   }
77 
78   nz = 0;
79   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
80     coloring->currentcolor = k;
81     nz_i = nz;
82     for (i=0; i<bs; i++) {
83       //printf(" color= %d, i= %d, nz_i= %d ----------------\n",k,i,nz_i);
84       nz = nz_i;
85       ierr = VecCopy(x1,w3);CHKERRQ(ierr);
86       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
87 
88       /*
89         Loop over each column associated with color
90         adding the perturbation to the vector w3.
91       */
92       for (l=0; l<coloring->ncolumns[k]; l++) {
93         col = i + bs*coloring->columns[k][l];
94         w3_array[col] += vscale_array[col];
95       }
96       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
97 
98       /*
99         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
100         w2 = F(x1 + dx) - F(x1)
101       */
102       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
103       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
104       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
105       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
106 
107       /*
108         Loop over rows of vector, putting results into Jacobian matrix
109       */
110       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
111       for (l=0; l<coloring->nrows[k]; l++) {
112         brow   = coloring->rowcolden2sp3[nz++];
113         bcol   = coloring->rowcolden2sp3[nz++];
114         bspidx = coloring->rowcolden2sp3[nz++];
115 
116         row = bs*brow;
117         col = bs*bcol+i;
118         spidx = bs2*bspidx+i*bs;
119 
120         for (j=0; j<bs; j++) {
121           ca[spidx+j] = y[row+j]/vscale_array[col];
122           //printf("(%d %d %g) nz= %d\n",row+j,col,ca[spidx],nz-3);
123         }
124       }
125       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
126     }
127   } /* endof for each color */
128   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
129   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
130 
131   coloring->currentcolor = -1;
132   PetscFunctionReturn(0);
133 }
134 
135 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
136 /* #define JACOBIANCOLOROPT */
137 #if defined(JACOBIANCOLOROPT)
138 #include <petsctime.h>
139 #endif
140 #undef __FUNCT__
141 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
142 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
143 {
144   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
145   PetscErrorCode ierr;
146   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
147   PetscScalar    dx,*y,*xx,*w3_array;
148   PetscScalar    *vscale_array;
149   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
150   Vec            w1=coloring->w1,w2=coloring->w2,w3;
151   void           *fctx=coloring->fctx;
152   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
153   PetscScalar    *ca;
154   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
155   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
156 #if defined(JACOBIANCOLOROPT)
157   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
158 #endif
159 
160   PetscFunctionBegin;
161 #if defined(JACOBIANCOLOROPT)
162   ierr = PetscTime(&t0);CHKERRQ(ierr);
163 #endif
164   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
165   if (isBAIJ) {
166     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
167     ca = csp->a;
168   } else {
169     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
170     ca = csp->a;
171     bs = 1; bs2 = 1;
172   }
173 
174   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
175   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
176   if (flg) {
177     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
178   } else {
179     PetscBool assembled;
180     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
181     if (assembled) {
182       ierr = MatZeroEntries(J);CHKERRQ(ierr);
183     }
184   }
185 
186   if (!coloring->vscale) {
187     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
188   }
189 
190   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
191     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
192   }
193 #if defined(JACOBIANCOLOROPT)
194     ierr = PetscTime(&t1);CHKERRQ(ierr);
195     t_init += t1 - t0;
196 #endif
197 
198   /* Set w1 = F(x1) */
199 #if defined(JACOBIANCOLOROPT)
200     ierr = PetscTime(&t0);CHKERRQ(ierr);
201 #endif
202   if (!coloring->fset) {
203     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
204     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
205     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
206   } else {
207     coloring->fset = PETSC_FALSE;
208   }
209 #if defined(JACOBIANCOLOROPT)
210     ierr = PetscTime(&t1);CHKERRQ(ierr);
211     t_setvals += t1 - t0;
212 #endif
213 
214 #if defined(JACOBIANCOLOROPT)
215     ierr = PetscTime(&t0);CHKERRQ(ierr);
216 #endif
217   if (!coloring->w3) {
218     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
219     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
220   }
221   w3 = coloring->w3;
222 
223   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
224   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
225   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
226   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
227   for (col=0; col<N; col++) {
228     if (coloring->htype[0] == 'w') {
229       dx = 1.0 + unorm;
230     } else {
231       dx = xx[col];
232     }
233     if (dx == (PetscScalar)0.0) dx = 1.0;
234     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
235     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
236     dx               *= epsilon;
237     vscale_array[col] = (PetscScalar)dx;
238   }
239 #if defined(JACOBIANCOLOROPT)
240     ierr = PetscTime(&t1);CHKERRQ(ierr);
241     t_init += t1 - t0;
242 #endif
243 
244   nz  = 0;
245   for (k=0; k<ncolors; k++) { /* loop over colors */
246 #if defined(JACOBIANCOLOROPT)
247     ierr = PetscTime(&t0);CHKERRQ(ierr);
248 #endif
249     coloring->currentcolor = k;
250 
251     /*
252       Loop over each column associated with color
253       adding the perturbation to the vector w3 = x1 + dx.
254     */
255     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
256     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
257     ncolumns_k = ncolumns[k];
258     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
259       col = columns[k][l];
260       w3_array[col] += vscale_array[col];
261     }
262     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
263 #if defined(JACOBIANCOLOROPT)
264     ierr = PetscTime(&t1);CHKERRQ(ierr);
265     t_dx += t1 - t0;
266 #endif
267 
268     /*
269       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
270                            w2 = F(x1 + dx) - F(x1)
271     */
272 #if defined(JACOBIANCOLOROPT)
273     ierr = PetscTime(&t0);CHKERRQ(ierr);
274 #endif
275     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
276     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
277     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
278 #if defined(JACOBIANCOLOROPT)
279     ierr = PetscTime(&t1);CHKERRQ(ierr);
280     t_func += t1 - t0;
281 #endif
282 
283 #if defined(JACOBIANCOLOROPT)
284     ierr = PetscTime(&t0);CHKERRQ(ierr);
285 #endif
286     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
287 
288     /*
289       Loop over rows of vector, putting w2/dx into Jacobian matrix
290     */
291     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
292     nrows_k = nrows[k];
293     for (l=0; l<nrows_k; l++) { /* loop over rows */
294 #if defined(JACOBIANCOLOROPT)
295     ierr = PetscTime(&t00);CHKERRQ(ierr);
296 #endif
297       row   = coloring->rowcolden2sp3[nz++];
298       col   = coloring->rowcolden2sp3[nz++];
299       spidx = coloring->rowcolden2sp3[nz++];
300 #if defined(JACOBIANCOLOROPT)
301       ierr = PetscTime(&t11);CHKERRQ(ierr);
302       t_kl += t11 - t00;
303 #endif
304       //printf(" row col val
305       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
306       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
307     }
308     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
309 
310 #if defined(JACOBIANCOLOROPT)
311     ierr = PetscTime(&t1);CHKERRQ(ierr);
312     t_setvals += t1 - t0;
313 #endif
314   } /* endof for each color */
315 #if defined(JACOBIANCOLOROPT)
316   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
317   printf("     FDColorApply time: kl %g\n",t_kl);
318 #endif
319   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
320   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
321 
322   coloring->currentcolor = -1;
323   PetscFunctionReturn(0);
324 }
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
328 /*
329     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
330 */
331 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
332 {
333   PetscErrorCode ierr;
334   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
335   const PetscInt *is,*rows,*ci,*cj;
336   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
337   IS             *isa;
338   PetscBool      isBAIJ;
339   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
340 
341   PetscFunctionBegin;
342   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
343 
344   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
345      SeqBAIJ calls this routine, thus check it */
346   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
347   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
348   if (!isBAIJ) {
349     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
350   }
351   N         = mat->cmap->N/bs;
352   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
353   c->N      = mat->cmap->N/bs;
354   c->m      = mat->rmap->N/bs;
355   c->rstart = 0;
356 
357   c->ncolors = nis;
358   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
359   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
360   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
361   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
362 
363   if (isBAIJ) {
364     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
365   } else {
366     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
367   }
368 
369   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
370   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
371 
372   nz = 0;
373   for (i=0; i<nis; i++) { /* loop over colors */
374     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
375     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
376 
377     c->ncolumns[i] = n;
378     if (n) {
379       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
380       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
381     } else {
382       c->columns[i] = 0;
383     }
384 
385     /* fast, crude version requires O(N*N) work */
386     nrows = 0;
387     for (j=0; j<n; j++) {  /* loop over columns */
388       col  = is[j];
389       rows = cj + ci[col];
390       m    = ci[col+1] - ci[col];
391       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
392         rowhit[*rows]     = col + 1;
393         spidxhit[*rows++] = spidx[ci[col] + k];
394         nrows++;
395       }
396     }
397     c->nrows[i] = nrows; /* total num of rows for this color */
398 
399     nrows = 0;
400     for (j=0; j<N; j++) { /* loop over rows */
401       if (rowhit[j]) {
402         c->rowcolden2sp3[nz++]   = j;             /* row index */
403         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
404         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
405         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
406       }
407     }
408     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
409   }
410   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
411 
412   if (isBAIJ) {
413     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
414   } else {
415     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
416   }
417   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
418   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
419 
420   c->ctype                  = IS_COLORING_GHOSTED;
421   if (isBAIJ) {
422     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
423   } else {
424     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
425   }
426   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
432 /*
433     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
434     This is why it has the ugly code with the MatGetBlockSize()
435 */
436 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
437 {
438   PetscErrorCode ierr;
439   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
440   const PetscInt *is,*rows,*ci,*cj;
441   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
442   IS             *isa;
443   PetscBool      done,flg = PETSC_FALSE;
444   PetscBool      flg1;
445   PetscBool      new_impl=PETSC_FALSE;
446 
447   PetscFunctionBegin;
448   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
449   if (new_impl){
450     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
451     PetscFunctionReturn(0);
452   }
453   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
454 
455   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
456   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
457      SeqBAIJ calls this routine, thus check it */
458   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
459   if (flg1) {
460     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
461   }
462 
463   N         = mat->cmap->N/bs;
464   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
465   c->N      = mat->cmap->N/bs;
466   c->m      = mat->rmap->N/bs;
467   c->rstart = 0;
468 
469   c->ncolors = nis;
470   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
471   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
472   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
473   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
474   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
475 
476   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
477   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
478 
479   /*
480      Temporary option to allow for debugging/testing
481   */
482   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
483 
484   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
485   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
486 
487   for (i=0; i<nis; i++) {
488     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
489     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
490 
491     c->ncolumns[i] = n;
492     if (n) {
493       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
494       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
495     } else {
496       c->columns[i] = 0;
497     }
498 
499     if (!flg) { /* ------------------------------------------------------------------------------*/
500       /* fast, crude version requires O(N*N) work */
501       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
502       /* loop over columns*/
503       for (j=0; j<n; j++) {
504         col  = is[j];
505         rows = cj + ci[col];
506         m    = ci[col+1] - ci[col];
507         /* loop over columns marking them in rowhit */
508         for (k=0; k<m; k++) {
509           rowhit[*rows++] = col + 1;
510         }
511       }
512       /* count the number of hits */
513       nrows = 0;
514       for (j=0; j<N; j++) {
515         if (rowhit[j]) nrows++;
516       }
517       c->nrows[i] = nrows;
518       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
519       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
520       nrows       = 0;
521       for (j=0; j<N; j++) {
522         if (rowhit[j]) {
523           c->rows[i][nrows]          = j;
524           c->columnsforrow[i][nrows] = rowhit[j] - 1;
525           nrows++;
526         }
527       }
528     } else {  /*-------------------------------------------------------------------------------*/
529       /* slow version, using rowhit as a linked list */
530       PetscInt currentcol,fm,mfm;
531       rowhit[N] = N;
532       nrows     = 0;
533       /* loop over columns */
534       for (j=0; j<n; j++) {
535         col  = is[j];
536         rows = cj + ci[col];
537         m    = ci[col+1] - ci[col];
538         /* loop over columns marking them in rowhit */
539         fm = N;    /* fm points to first entry in linked list */
540         for (k=0; k<m; k++) {
541           currentcol = *rows++;
542           /* is it already in the list? */
543           do {
544             mfm = fm;
545             fm  = rowhit[fm];
546           } while (fm < currentcol);
547           /* not in list so add it */
548           if (fm != currentcol) {
549             nrows++;
550             columnsforrow[currentcol] = col;
551             /* next three lines insert new entry into linked list */
552             rowhit[mfm]        = currentcol;
553             rowhit[currentcol] = fm;
554             fm                 = currentcol;
555             /* fm points to present position in list since we know the columns are sorted */
556           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
557         }
558       }
559       c->nrows[i] = nrows;
560       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
561       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
562       /* now store the linked list of rows into c->rows[i] */
563       nrows = 0;
564       fm    = rowhit[N];
565       do {
566         c->rows[i][nrows]            = fm;
567         c->columnsforrow[i][nrows++] = columnsforrow[fm];
568         fm                           = rowhit[fm];
569       } while (fm < N);
570     } /* ---------------------------------------------------------------------------------------*/
571     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
572   }
573   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
574 
575   ierr = PetscFree(rowhit);CHKERRQ(ierr);
576   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
577 
578   /* Optimize by adding the vscale, and scaleforrow[][] fields */
579   /*
580        see the version for MPIAIJ
581   */
582   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
583   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
584   for (k=0; k<c->ncolors; k++) {
585     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
586     for (l=0; l<c->nrows[k]; l++) {
587       col = c->columnsforrow[k][l];
588 
589       c->vscaleforrow[k][l] = col;
590     }
591   }
592   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596