xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision b7799381d9b8e477defde34ba576b62ec2b0e4ad)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19   PetscScalar    *ca = csp->a,*ca_l;
20 
21   PetscFunctionBegin;
22   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24   if (flg) {
25     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26   } else {
27     PetscBool assembled;
28     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29     if (assembled) {
30       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31     }
32   }
33   if (!coloring->vscale) {
34     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35   }
36 
37   /* Set w1 = F(x1) */
38   if (!coloring->fset) {
39     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42   } else {
43     coloring->fset = PETSC_FALSE;
44   }
45 
46   if (!coloring->w3) {
47     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49   }
50   w3 = coloring->w3;
51 
52   /* Compute local scale factors: vscale = dx */
53   if (coloring->htype[0] == 'w') {
54     /* tacky test; need to make systematic if we add other approaches to computing h*/
55     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56     dx = (1.0 + unorm)*epsilon;
57     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58   } else {
59     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61     for (col=0; col<N; col++) {
62       dx = xx[col];
63       if (dx == 0.0) dx = 1.0;
64       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66       dx               *= epsilon;
67       vscale_array[col] = dx;
68     }
69     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
71   }
72 
73   nz = 0;
74   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76     coloring->currentcolor = k;
77 
78     /* Compute w3 = x1 + dx */
79     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80     dy_i = dy;
81     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83       for (l=0; l<coloring->ncolumns[k]; l++) {
84         col            = i + bs*coloring->columns[k][l];
85         w3_array[col] += vscale_array[col];
86         if (i) {
87           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88         }
89       }
90       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91 
92       /* Evaluate function w2 = F(w3) - F(x1) */
93       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98       ierr = VecResetArray(w2);CHKERRQ(ierr);
99       dy_i += N; /* points to dy+i*N */
100     }
101 
102     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103     for (l=0; l<coloring->nrows[k]; l++) {
104       row   = bs*coloring->rowcolden2sp3[nz++];
105       col   = bs*coloring->rowcolden2sp3[nz++];
106       ca_l  = ca + bs2*coloring->rowcolden2sp3[nz++];
107       spidx = 0;
108       dy_i  = dy;
109       for (i=0; i<bs; i++) {   /* column of the block */
110         for (j=0; j<bs; j++) { /* row of the block */
111           ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i];
112         }
113         dy_i += N; /* points to dy+i*N */
114       }
115     }
116   } /* endof for each color */
117   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
118 
119   coloring->currentcolor = -1;
120   PetscFunctionReturn(0);
121 }
122 
123 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
124 /* #define JACOBIANCOLOROPT */
125 #if defined(JACOBIANCOLOROPT)
126 #include <petsctime.h>
127 #endif
128 #undef __FUNCT__
129 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
130 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
131 {
132   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
133   PetscErrorCode ierr;
134   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
135   PetscScalar    dx,*y,*xx,*w3_array;
136   PetscScalar    *vscale_array;
137   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
138   Vec            w1=coloring->w1,w2=coloring->w2,w3;
139   void           *fctx=coloring->fctx;
140   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
141   PetscScalar    *ca;
142   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
143   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
144 #if defined(JACOBIANCOLOROPT)
145   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
146 #endif
147 
148   PetscFunctionBegin;
149 #if defined(JACOBIANCOLOROPT)
150   ierr = PetscTime(&t0);CHKERRQ(ierr);
151 #endif
152   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
153   if (isBAIJ) {
154     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
155     ca = csp->a;
156   } else {
157     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
158     ca = csp->a;
159     bs = 1; bs2 = 1;
160   }
161 
162   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
163   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
164   if (flg) {
165     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
166   } else {
167     PetscBool assembled;
168     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
169     if (assembled) {
170       ierr = MatZeroEntries(J);CHKERRQ(ierr);
171     }
172   }
173 
174   if (!coloring->vscale) {
175     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
176   }
177 
178   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
179     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
180   }
181 #if defined(JACOBIANCOLOROPT)
182     ierr = PetscTime(&t1);CHKERRQ(ierr);
183     t_init += t1 - t0;
184 #endif
185 
186   /* Set w1 = F(x1) */
187 #if defined(JACOBIANCOLOROPT)
188     ierr = PetscTime(&t0);CHKERRQ(ierr);
189 #endif
190   if (!coloring->fset) {
191     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
192     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
193     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
194   } else {
195     coloring->fset = PETSC_FALSE;
196   }
197 #if defined(JACOBIANCOLOROPT)
198     ierr = PetscTime(&t1);CHKERRQ(ierr);
199     t_setvals += t1 - t0;
200 #endif
201 
202 #if defined(JACOBIANCOLOROPT)
203     ierr = PetscTime(&t0);CHKERRQ(ierr);
204 #endif
205   if (!coloring->w3) {
206     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
207     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
208   }
209   w3 = coloring->w3;
210 
211   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
212   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
213   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
214   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
215   for (col=0; col<N; col++) {
216     if (coloring->htype[0] == 'w') {
217       dx = 1.0 + unorm;
218     } else {
219       dx = xx[col];
220     }
221     if (dx == (PetscScalar)0.0) dx = 1.0;
222     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
223     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
224     dx               *= epsilon;
225     vscale_array[col] = (PetscScalar)dx;
226   }
227 #if defined(JACOBIANCOLOROPT)
228     ierr = PetscTime(&t1);CHKERRQ(ierr);
229     t_init += t1 - t0;
230 #endif
231 
232   nz  = 0;
233   for (k=0; k<ncolors; k++) { /* loop over colors */
234 #if defined(JACOBIANCOLOROPT)
235     ierr = PetscTime(&t0);CHKERRQ(ierr);
236 #endif
237     coloring->currentcolor = k;
238 
239     /*
240       Loop over each column associated with color
241       adding the perturbation to the vector w3 = x1 + dx.
242     */
243     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
244     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
245     ncolumns_k = ncolumns[k];
246     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
247       col = columns[k][l];
248       w3_array[col] += vscale_array[col];
249     }
250     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
251 #if defined(JACOBIANCOLOROPT)
252     ierr = PetscTime(&t1);CHKERRQ(ierr);
253     t_dx += t1 - t0;
254 #endif
255 
256     /*
257       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
258                            w2 = F(x1 + dx) - F(x1)
259     */
260 #if defined(JACOBIANCOLOROPT)
261     ierr = PetscTime(&t0);CHKERRQ(ierr);
262 #endif
263     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
264     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
265     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
266 #if defined(JACOBIANCOLOROPT)
267     ierr = PetscTime(&t1);CHKERRQ(ierr);
268     t_func += t1 - t0;
269 #endif
270 
271 #if defined(JACOBIANCOLOROPT)
272     ierr = PetscTime(&t0);CHKERRQ(ierr);
273 #endif
274     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
275 
276     /*
277       Loop over rows of vector, putting w2/dx into Jacobian matrix
278     */
279     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
280     nrows_k = nrows[k];
281     for (l=0; l<nrows_k; l++) { /* loop over rows */
282 #if defined(JACOBIANCOLOROPT)
283     ierr = PetscTime(&t00);CHKERRQ(ierr);
284 #endif
285       row   = coloring->rowcolden2sp3[nz++];
286       col   = coloring->rowcolden2sp3[nz++];
287       spidx = coloring->rowcolden2sp3[nz++];
288 #if defined(JACOBIANCOLOROPT)
289       ierr = PetscTime(&t11);CHKERRQ(ierr);
290       t_kl += t11 - t00;
291 #endif
292       //printf(" row col val
293       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
294       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
295     }
296     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
297 
298 #if defined(JACOBIANCOLOROPT)
299     ierr = PetscTime(&t1);CHKERRQ(ierr);
300     t_setvals += t1 - t0;
301 #endif
302   } /* endof for each color */
303 #if defined(JACOBIANCOLOROPT)
304   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
305   printf("     FDColorApply time: kl %g\n",t_kl);
306 #endif
307   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
308   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
309 
310   coloring->currentcolor = -1;
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
316 /*
317     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
318 */
319 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
320 {
321   PetscErrorCode ierr;
322   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
323   const PetscInt *is,*rows,*ci,*cj;
324   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
325   IS             *isa;
326   PetscBool      isBAIJ;
327   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
328 
329   PetscFunctionBegin;
330   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
331 
332   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
333      SeqBAIJ calls this routine, thus check it */
334   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
335   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
336   if (!isBAIJ) {
337     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
338   }
339   N         = mat->cmap->N/bs;
340   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
341   c->N      = mat->cmap->N/bs;
342   c->m      = mat->rmap->N/bs;
343   c->rstart = 0;
344 
345   c->ncolors = nis;
346   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
347   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
348   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
349   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
350 
351   if (isBAIJ) {
352     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
353   } else {
354     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
355   }
356 
357   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
358   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
359 
360   nz = 0;
361   for (i=0; i<nis; i++) { /* loop over colors */
362     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
363     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
364 
365     c->ncolumns[i] = n;
366     if (n) {
367       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
368       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
369     } else {
370       c->columns[i] = 0;
371     }
372 
373     /* fast, crude version requires O(N*N) work */
374     nrows = 0;
375     for (j=0; j<n; j++) {  /* loop over columns */
376       col  = is[j];
377       rows = cj + ci[col];
378       m    = ci[col+1] - ci[col];
379       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
380         rowhit[*rows]     = col + 1;
381         spidxhit[*rows++] = spidx[ci[col] + k];
382         nrows++;
383       }
384     }
385     c->nrows[i] = nrows; /* total num of rows for this color */
386 
387     nrows = 0;
388     for (j=0; j<N; j++) { /* loop over rows */
389       if (rowhit[j]) {
390         c->rowcolden2sp3[nz++]   = j;             /* row index */
391         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
392         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
393         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
394       }
395     }
396     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
397   }
398   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
399 
400   if (isBAIJ) {
401     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
402   } else {
403     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
404   }
405   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
406   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
407 
408   c->ctype                  = IS_COLORING_GHOSTED;
409   if (isBAIJ) {
410     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
411     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
412   } else {
413     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
414   }
415   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
421 /*
422     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
423     This is why it has the ugly code with the MatGetBlockSize()
424 */
425 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
426 {
427   PetscErrorCode ierr;
428   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
429   const PetscInt *is,*rows,*ci,*cj;
430   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
431   IS             *isa;
432   PetscBool      done,flg = PETSC_FALSE;
433   PetscBool      flg1;
434   PetscBool      new_impl=PETSC_FALSE;
435 
436   PetscFunctionBegin;
437   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
438   if (new_impl){
439     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
440     PetscFunctionReturn(0);
441   }
442   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
443 
444   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
445   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
446      SeqBAIJ calls this routine, thus check it */
447   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
448   if (flg1) {
449     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
450   }
451 
452   N         = mat->cmap->N/bs;
453   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
454   c->N      = mat->cmap->N/bs;
455   c->m      = mat->rmap->N/bs;
456   c->rstart = 0;
457 
458   c->ncolors = nis;
459   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
460   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
461   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
462   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
463   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
464 
465   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
466   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
467 
468   /*
469      Temporary option to allow for debugging/testing
470   */
471   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
472 
473   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
474   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
475 
476   for (i=0; i<nis; i++) {
477     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
478     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
479 
480     c->ncolumns[i] = n;
481     if (n) {
482       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
483       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
484     } else {
485       c->columns[i] = 0;
486     }
487 
488     if (!flg) { /* ------------------------------------------------------------------------------*/
489       /* fast, crude version requires O(N*N) work */
490       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
491       /* loop over columns*/
492       for (j=0; j<n; j++) {
493         col  = is[j];
494         rows = cj + ci[col];
495         m    = ci[col+1] - ci[col];
496         /* loop over columns marking them in rowhit */
497         for (k=0; k<m; k++) {
498           rowhit[*rows++] = col + 1;
499         }
500       }
501       /* count the number of hits */
502       nrows = 0;
503       for (j=0; j<N; j++) {
504         if (rowhit[j]) nrows++;
505       }
506       c->nrows[i] = nrows;
507       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
508       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
509       nrows       = 0;
510       for (j=0; j<N; j++) {
511         if (rowhit[j]) {
512           c->rows[i][nrows]          = j;
513           c->columnsforrow[i][nrows] = rowhit[j] - 1;
514           nrows++;
515         }
516       }
517     } else {  /*-------------------------------------------------------------------------------*/
518       /* slow version, using rowhit as a linked list */
519       PetscInt currentcol,fm,mfm;
520       rowhit[N] = N;
521       nrows     = 0;
522       /* loop over columns */
523       for (j=0; j<n; j++) {
524         col  = is[j];
525         rows = cj + ci[col];
526         m    = ci[col+1] - ci[col];
527         /* loop over columns marking them in rowhit */
528         fm = N;    /* fm points to first entry in linked list */
529         for (k=0; k<m; k++) {
530           currentcol = *rows++;
531           /* is it already in the list? */
532           do {
533             mfm = fm;
534             fm  = rowhit[fm];
535           } while (fm < currentcol);
536           /* not in list so add it */
537           if (fm != currentcol) {
538             nrows++;
539             columnsforrow[currentcol] = col;
540             /* next three lines insert new entry into linked list */
541             rowhit[mfm]        = currentcol;
542             rowhit[currentcol] = fm;
543             fm                 = currentcol;
544             /* fm points to present position in list since we know the columns are sorted */
545           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
546         }
547       }
548       c->nrows[i] = nrows;
549       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
550       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
551       /* now store the linked list of rows into c->rows[i] */
552       nrows = 0;
553       fm    = rowhit[N];
554       do {
555         c->rows[i][nrows]            = fm;
556         c->columnsforrow[i][nrows++] = columnsforrow[fm];
557         fm                           = rowhit[fm];
558       } while (fm < N);
559     } /* ---------------------------------------------------------------------------------------*/
560     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
561   }
562   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
563 
564   ierr = PetscFree(rowhit);CHKERRQ(ierr);
565   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
566 
567   /* Optimize by adding the vscale, and scaleforrow[][] fields */
568   /*
569        see the version for MPIAIJ
570   */
571   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
572   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
573   for (k=0; k<c->ncolors; k++) {
574     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
575     for (l=0; l<c->nrows[k]; l++) {
576       col = c->columnsforrow[k][l];
577 
578       c->vscaleforrow[k][l] = col;
579     }
580   }
581   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
582   PetscFunctionReturn(0);
583 }
584 
585