xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 87d342027ce5cce7f65b04bf813b2774ad4afbf4)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N;
12   PetscScalar    dx,*y,*xx,*w3_array;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19   PetscScalar    *ca = csp->a,*ca_l;
20   PetscInt       brow,bcol,bspidx,nz,nz_i;
21 
22   PetscFunctionBegin;
23   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
24   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
25   if (flg) {
26     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
27   } else {
28     PetscBool assembled;
29     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
30     if (assembled) {
31       ierr = MatZeroEntries(J);CHKERRQ(ierr);
32     }
33   }
34 
35   if (!coloring->vscale) {
36     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
37   }
38 
39   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
40     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
41   }
42 
43   /* Set w1 = F(x1) */
44   if (!coloring->fset) {
45     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
46     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
47     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
48   } else {
49     coloring->fset = PETSC_FALSE;
50   }
51 
52   if (!coloring->w3) {
53     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
54     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
55   }
56   w3 = coloring->w3;
57 
58   /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */
59   ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr);
60   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
62 
63   /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
64   for (col=0; col<N; col++) {
65     if (coloring->htype[0] == 'w') {
66       dx = 1.0 + unorm;
67     } else {
68       dx = xx[col];
69     }
70     if (dx == (PetscScalar)0.0) dx = 1.0;
71     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
72     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
73     dx               *= epsilon;
74     vscale_array[col] = (PetscScalar)dx;
75   }
76 
77   /* Create bs vectors w2s and w3s */
78   Vec w2s[bs];
79   for (i=0; i<bs; i++) {
80     ierr = VecDuplicate(w2,&w2s[i]);CHKERRQ(ierr);
81   }
82 
83   nz = 0;
84   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
85     coloring->currentcolor = k;
86 
87     /*----------- new -------------*/
88     /* Compute w3 = x1 + dx */
89     for (i=0; i<bs; i++) {
90       ierr = VecCopy(x1,w3);CHKERRQ(ierr);
91       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
92       for (l=0; l<coloring->ncolumns[k]; l++) {
93         col = i + bs*coloring->columns[k][l];
94         w3_array[col] += vscale_array[col];
95       }
96       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
97 
98       /* Evaluate function w2s = F(w3) - F(x1) */
99       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
100       ierr = (*f)(sctx,w3,w2s[i],fctx);CHKERRQ(ierr);
101       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
102       ierr = VecAXPY(w2s[i],-1.0,w1);CHKERRQ(ierr);
103     }
104     /*----------- endof new -------------*/
105 
106     nz_i = nz;
107     for (i=0; i<bs; i++) {
108       nz = nz_i;
109       /*
110         Loop over rows of vector, putting results into Jacobian matrix
111       */
112       ierr = VecGetArray(w2s[i],&y);CHKERRQ(ierr);
113       for (l=0; l<coloring->nrows[k]; l++) {
114         brow   = coloring->rowcolden2sp3[nz++];
115         bcol   = coloring->rowcolden2sp3[nz++];
116         bspidx = coloring->rowcolden2sp3[nz++];
117         row   = bs*brow;
118         col   = bs*bcol + i;
119 
120         ca_l  = ca + bs2*bspidx + i*bs;
121         for (j=0; j<bs; j++) {
122           ca_l[j] = y[row+j]/vscale_array[col];
123         }
124       }
125       ierr = VecRestoreArray(w2s[i],&y);CHKERRQ(ierr);
126     } /* endof for (i=0; i<bs; i++) */
127   } /* endof for each color */
128   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
129   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
130   for (i=0; i<bs; i++) {
131     ierr = VecDestroy(&w2s[i]);CHKERRQ(ierr);
132   }
133   coloring->currentcolor = -1;
134   PetscFunctionReturn(0);
135 }
136 
137 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
138 /* #define JACOBIANCOLOROPT */
139 #if defined(JACOBIANCOLOROPT)
140 #include <petsctime.h>
141 #endif
142 #undef __FUNCT__
143 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
144 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
145 {
146   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
147   PetscErrorCode ierr;
148   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
149   PetscScalar    dx,*y,*xx,*w3_array;
150   PetscScalar    *vscale_array;
151   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
152   Vec            w1=coloring->w1,w2=coloring->w2,w3;
153   void           *fctx=coloring->fctx;
154   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
155   PetscScalar    *ca;
156   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
157   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
158 #if defined(JACOBIANCOLOROPT)
159   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
160 #endif
161 
162   PetscFunctionBegin;
163 #if defined(JACOBIANCOLOROPT)
164   ierr = PetscTime(&t0);CHKERRQ(ierr);
165 #endif
166   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
167   if (isBAIJ) {
168     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
169     ca = csp->a;
170   } else {
171     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
172     ca = csp->a;
173     bs = 1; bs2 = 1;
174   }
175 
176   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
177   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
178   if (flg) {
179     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
180   } else {
181     PetscBool assembled;
182     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
183     if (assembled) {
184       ierr = MatZeroEntries(J);CHKERRQ(ierr);
185     }
186   }
187 
188   if (!coloring->vscale) {
189     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
190   }
191 
192   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
193     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
194   }
195 #if defined(JACOBIANCOLOROPT)
196     ierr = PetscTime(&t1);CHKERRQ(ierr);
197     t_init += t1 - t0;
198 #endif
199 
200   /* Set w1 = F(x1) */
201 #if defined(JACOBIANCOLOROPT)
202     ierr = PetscTime(&t0);CHKERRQ(ierr);
203 #endif
204   if (!coloring->fset) {
205     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
206     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
207     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
208   } else {
209     coloring->fset = PETSC_FALSE;
210   }
211 #if defined(JACOBIANCOLOROPT)
212     ierr = PetscTime(&t1);CHKERRQ(ierr);
213     t_setvals += t1 - t0;
214 #endif
215 
216 #if defined(JACOBIANCOLOROPT)
217     ierr = PetscTime(&t0);CHKERRQ(ierr);
218 #endif
219   if (!coloring->w3) {
220     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
221     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
222   }
223   w3 = coloring->w3;
224 
225   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
226   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
227   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
228   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
229   for (col=0; col<N; col++) {
230     if (coloring->htype[0] == 'w') {
231       dx = 1.0 + unorm;
232     } else {
233       dx = xx[col];
234     }
235     if (dx == (PetscScalar)0.0) dx = 1.0;
236     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
237     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
238     dx               *= epsilon;
239     vscale_array[col] = (PetscScalar)dx;
240   }
241 #if defined(JACOBIANCOLOROPT)
242     ierr = PetscTime(&t1);CHKERRQ(ierr);
243     t_init += t1 - t0;
244 #endif
245 
246   nz  = 0;
247   for (k=0; k<ncolors; k++) { /* loop over colors */
248 #if defined(JACOBIANCOLOROPT)
249     ierr = PetscTime(&t0);CHKERRQ(ierr);
250 #endif
251     coloring->currentcolor = k;
252 
253     /*
254       Loop over each column associated with color
255       adding the perturbation to the vector w3 = x1 + dx.
256     */
257     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
258     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
259     ncolumns_k = ncolumns[k];
260     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
261       col = columns[k][l];
262       w3_array[col] += vscale_array[col];
263     }
264     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
265 #if defined(JACOBIANCOLOROPT)
266     ierr = PetscTime(&t1);CHKERRQ(ierr);
267     t_dx += t1 - t0;
268 #endif
269 
270     /*
271       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
272                            w2 = F(x1 + dx) - F(x1)
273     */
274 #if defined(JACOBIANCOLOROPT)
275     ierr = PetscTime(&t0);CHKERRQ(ierr);
276 #endif
277     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
278     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
279     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
280 #if defined(JACOBIANCOLOROPT)
281     ierr = PetscTime(&t1);CHKERRQ(ierr);
282     t_func += t1 - t0;
283 #endif
284 
285 #if defined(JACOBIANCOLOROPT)
286     ierr = PetscTime(&t0);CHKERRQ(ierr);
287 #endif
288     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
289 
290     /*
291       Loop over rows of vector, putting w2/dx into Jacobian matrix
292     */
293     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
294     nrows_k = nrows[k];
295     for (l=0; l<nrows_k; l++) { /* loop over rows */
296 #if defined(JACOBIANCOLOROPT)
297     ierr = PetscTime(&t00);CHKERRQ(ierr);
298 #endif
299       row   = coloring->rowcolden2sp3[nz++];
300       col   = coloring->rowcolden2sp3[nz++];
301       spidx = coloring->rowcolden2sp3[nz++];
302 #if defined(JACOBIANCOLOROPT)
303       ierr = PetscTime(&t11);CHKERRQ(ierr);
304       t_kl += t11 - t00;
305 #endif
306       //printf(" row col val
307       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
308       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
309     }
310     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
311 
312 #if defined(JACOBIANCOLOROPT)
313     ierr = PetscTime(&t1);CHKERRQ(ierr);
314     t_setvals += t1 - t0;
315 #endif
316   } /* endof for each color */
317 #if defined(JACOBIANCOLOROPT)
318   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
319   printf("     FDColorApply time: kl %g\n",t_kl);
320 #endif
321   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
322   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
323 
324   coloring->currentcolor = -1;
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
330 /*
331     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
332 */
333 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
334 {
335   PetscErrorCode ierr;
336   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
337   const PetscInt *is,*rows,*ci,*cj;
338   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
339   IS             *isa;
340   PetscBool      isBAIJ;
341   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
342 
343   PetscFunctionBegin;
344   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
345 
346   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
347      SeqBAIJ calls this routine, thus check it */
348   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
349   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
350   if (!isBAIJ) {
351     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
352   }
353   N         = mat->cmap->N/bs;
354   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
355   c->N      = mat->cmap->N/bs;
356   c->m      = mat->rmap->N/bs;
357   c->rstart = 0;
358 
359   c->ncolors = nis;
360   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
361   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
362   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
363   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
364 
365   if (isBAIJ) {
366     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
367   } else {
368     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
369   }
370 
371   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
372   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
373 
374   nz = 0;
375   for (i=0; i<nis; i++) { /* loop over colors */
376     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
377     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
378 
379     c->ncolumns[i] = n;
380     if (n) {
381       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
382       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
383     } else {
384       c->columns[i] = 0;
385     }
386 
387     /* fast, crude version requires O(N*N) work */
388     nrows = 0;
389     for (j=0; j<n; j++) {  /* loop over columns */
390       col  = is[j];
391       rows = cj + ci[col];
392       m    = ci[col+1] - ci[col];
393       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
394         rowhit[*rows]     = col + 1;
395         spidxhit[*rows++] = spidx[ci[col] + k];
396         nrows++;
397       }
398     }
399     c->nrows[i] = nrows; /* total num of rows for this color */
400 
401     nrows = 0;
402     for (j=0; j<N; j++) { /* loop over rows */
403       if (rowhit[j]) {
404         c->rowcolden2sp3[nz++]   = j;             /* row index */
405         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
406         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
407         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
408       }
409     }
410     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
411   }
412   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
413 
414   if (isBAIJ) {
415     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
416   } else {
417     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
418   }
419   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
420   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
421 
422   c->ctype                  = IS_COLORING_GHOSTED;
423   if (isBAIJ) {
424     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
425   } else {
426     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
427   }
428   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
434 /*
435     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
436     This is why it has the ugly code with the MatGetBlockSize()
437 */
438 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
439 {
440   PetscErrorCode ierr;
441   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
442   const PetscInt *is,*rows,*ci,*cj;
443   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
444   IS             *isa;
445   PetscBool      done,flg = PETSC_FALSE;
446   PetscBool      flg1;
447   PetscBool      new_impl=PETSC_FALSE;
448 
449   PetscFunctionBegin;
450   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
451   if (new_impl){
452     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
453     PetscFunctionReturn(0);
454   }
455   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
456 
457   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
458   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
459      SeqBAIJ calls this routine, thus check it */
460   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
461   if (flg1) {
462     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
463   }
464 
465   N         = mat->cmap->N/bs;
466   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
467   c->N      = mat->cmap->N/bs;
468   c->m      = mat->rmap->N/bs;
469   c->rstart = 0;
470 
471   c->ncolors = nis;
472   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
473   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
474   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
475   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
476   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
477 
478   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
479   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
480 
481   /*
482      Temporary option to allow for debugging/testing
483   */
484   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
485 
486   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
487   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
488 
489   for (i=0; i<nis; i++) {
490     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
491     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
492 
493     c->ncolumns[i] = n;
494     if (n) {
495       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
496       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
497     } else {
498       c->columns[i] = 0;
499     }
500 
501     if (!flg) { /* ------------------------------------------------------------------------------*/
502       /* fast, crude version requires O(N*N) work */
503       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
504       /* loop over columns*/
505       for (j=0; j<n; j++) {
506         col  = is[j];
507         rows = cj + ci[col];
508         m    = ci[col+1] - ci[col];
509         /* loop over columns marking them in rowhit */
510         for (k=0; k<m; k++) {
511           rowhit[*rows++] = col + 1;
512         }
513       }
514       /* count the number of hits */
515       nrows = 0;
516       for (j=0; j<N; j++) {
517         if (rowhit[j]) nrows++;
518       }
519       c->nrows[i] = nrows;
520       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
521       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
522       nrows       = 0;
523       for (j=0; j<N; j++) {
524         if (rowhit[j]) {
525           c->rows[i][nrows]          = j;
526           c->columnsforrow[i][nrows] = rowhit[j] - 1;
527           nrows++;
528         }
529       }
530     } else {  /*-------------------------------------------------------------------------------*/
531       /* slow version, using rowhit as a linked list */
532       PetscInt currentcol,fm,mfm;
533       rowhit[N] = N;
534       nrows     = 0;
535       /* loop over columns */
536       for (j=0; j<n; j++) {
537         col  = is[j];
538         rows = cj + ci[col];
539         m    = ci[col+1] - ci[col];
540         /* loop over columns marking them in rowhit */
541         fm = N;    /* fm points to first entry in linked list */
542         for (k=0; k<m; k++) {
543           currentcol = *rows++;
544           /* is it already in the list? */
545           do {
546             mfm = fm;
547             fm  = rowhit[fm];
548           } while (fm < currentcol);
549           /* not in list so add it */
550           if (fm != currentcol) {
551             nrows++;
552             columnsforrow[currentcol] = col;
553             /* next three lines insert new entry into linked list */
554             rowhit[mfm]        = currentcol;
555             rowhit[currentcol] = fm;
556             fm                 = currentcol;
557             /* fm points to present position in list since we know the columns are sorted */
558           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
559         }
560       }
561       c->nrows[i] = nrows;
562       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
563       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
564       /* now store the linked list of rows into c->rows[i] */
565       nrows = 0;
566       fm    = rowhit[N];
567       do {
568         c->rows[i][nrows]            = fm;
569         c->columnsforrow[i][nrows++] = columnsforrow[fm];
570         fm                           = rowhit[fm];
571       } while (fm < N);
572     } /* ---------------------------------------------------------------------------------------*/
573     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
574   }
575   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
576 
577   ierr = PetscFree(rowhit);CHKERRQ(ierr);
578   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
579 
580   /* Optimize by adding the vscale, and scaleforrow[][] fields */
581   /*
582        see the version for MPIAIJ
583   */
584   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
585   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
586   for (k=0; k<c->ncolors; k++) {
587     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
588     for (l=0; l<c->nrows[k]; l++) {
589       col = c->columnsforrow[k][l];
590 
591       c->vscaleforrow[k][l] = col;
592     }
593   }
594   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598