xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 642b4fb16a28ae397e251fff3acee39b29a57859)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N;
12   PetscScalar    dx,*y,*xx,*w3_array;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19   PetscScalar    *ca = csp->a,*ca_l;
20   PetscInt       brow,bcol,bspidx,spidx,nz,nz_i;
21 
22   PetscFunctionBegin;
23   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
24   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
25   if (flg) {
26     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
27   } else {
28     PetscBool assembled;
29     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
30     if (assembled) {
31       ierr = MatZeroEntries(J);CHKERRQ(ierr);
32     }
33   }
34 
35   if (!coloring->vscale) {
36     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
37   }
38 
39   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
40     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
41   }
42 
43   /* Set w1 = F(x1) */
44   if (!coloring->fset) {
45     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
46     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
47     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
48   } else {
49     coloring->fset = PETSC_FALSE;
50   }
51 
52   if (!coloring->w3) {
53     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
54     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
55   }
56   w3 = coloring->w3;
57 
58   /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */
59   ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr);
60   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
62 
63   /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
64   for (col=0; col<N; col++) {
65     if (coloring->htype[0] == 'w') {
66       dx = 1.0 + unorm;
67     } else {
68       dx = xx[col];
69     }
70     if (dx == (PetscScalar)0.0) dx = 1.0;
71     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
72     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
73     dx               *= epsilon;
74     vscale_array[col] = (PetscScalar)dx;
75   }
76 
77   nz = 0;
78   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
79     coloring->currentcolor = k;
80     nz_i = nz;
81     for (i=0; i<bs; i++) {
82       nz = nz_i;
83       ierr = VecCopy(x1,w3);CHKERRQ(ierr);
84       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
85 
86       /*
87         Loop over each column associated with color
88         adding the perturbation to the vector w3.
89       */
90       for (l=0; l<coloring->ncolumns[k]; l++) {
91         col = i + bs*coloring->columns[k][l];
92         w3_array[col] += vscale_array[col];
93       }
94       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
95 
96       /*
97         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
98         w2 = F(x1 + dx) - F(x1)
99       */
100       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
101       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
102       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
103       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
104 
105       /*
106         Loop over rows of vector, putting results into Jacobian matrix
107       */
108       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
109       for (l=0; l<coloring->nrows[k]; l++) {
110         brow   = coloring->rowcolden2sp3[nz++];
111         bcol   = coloring->rowcolden2sp3[nz++];
112         bspidx = coloring->rowcolden2sp3[nz++];
113         row   = bs*brow;
114         col   = bs*bcol + i;
115 
116         ca_l  = ca + bs2*bspidx + i*bs;
117         for (j=0; j<bs; j++) {
118           ca_l[j] = y[row+j]/vscale_array[col];
119         }
120       }
121       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
122     } /* endof for (i=0; i<bs; i++) */
123   } /* endof for each color */
124   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
125   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
126 
127   coloring->currentcolor = -1;
128   PetscFunctionReturn(0);
129 }
130 
131 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
132 /* #define JACOBIANCOLOROPT */
133 #if defined(JACOBIANCOLOROPT)
134 #include <petsctime.h>
135 #endif
136 #undef __FUNCT__
137 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
138 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
139 {
140   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
141   PetscErrorCode ierr;
142   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
143   PetscScalar    dx,*y,*xx,*w3_array;
144   PetscScalar    *vscale_array;
145   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
146   Vec            w1=coloring->w1,w2=coloring->w2,w3;
147   void           *fctx=coloring->fctx;
148   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
149   PetscScalar    *ca;
150   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
151   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
152 #if defined(JACOBIANCOLOROPT)
153   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
154 #endif
155 
156   PetscFunctionBegin;
157 #if defined(JACOBIANCOLOROPT)
158   ierr = PetscTime(&t0);CHKERRQ(ierr);
159 #endif
160   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
161   if (isBAIJ) {
162     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
163     ca = csp->a;
164   } else {
165     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
166     ca = csp->a;
167     bs = 1; bs2 = 1;
168   }
169 
170   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
171   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
172   if (flg) {
173     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
174   } else {
175     PetscBool assembled;
176     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
177     if (assembled) {
178       ierr = MatZeroEntries(J);CHKERRQ(ierr);
179     }
180   }
181 
182   if (!coloring->vscale) {
183     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
184   }
185 
186   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
187     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
188   }
189 #if defined(JACOBIANCOLOROPT)
190     ierr = PetscTime(&t1);CHKERRQ(ierr);
191     t_init += t1 - t0;
192 #endif
193 
194   /* Set w1 = F(x1) */
195 #if defined(JACOBIANCOLOROPT)
196     ierr = PetscTime(&t0);CHKERRQ(ierr);
197 #endif
198   if (!coloring->fset) {
199     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
200     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
201     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
202   } else {
203     coloring->fset = PETSC_FALSE;
204   }
205 #if defined(JACOBIANCOLOROPT)
206     ierr = PetscTime(&t1);CHKERRQ(ierr);
207     t_setvals += t1 - t0;
208 #endif
209 
210 #if defined(JACOBIANCOLOROPT)
211     ierr = PetscTime(&t0);CHKERRQ(ierr);
212 #endif
213   if (!coloring->w3) {
214     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
215     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
216   }
217   w3 = coloring->w3;
218 
219   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
220   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
221   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
222   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
223   for (col=0; col<N; col++) {
224     if (coloring->htype[0] == 'w') {
225       dx = 1.0 + unorm;
226     } else {
227       dx = xx[col];
228     }
229     if (dx == (PetscScalar)0.0) dx = 1.0;
230     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
231     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
232     dx               *= epsilon;
233     vscale_array[col] = (PetscScalar)dx;
234   }
235 #if defined(JACOBIANCOLOROPT)
236     ierr = PetscTime(&t1);CHKERRQ(ierr);
237     t_init += t1 - t0;
238 #endif
239 
240   nz  = 0;
241   for (k=0; k<ncolors; k++) { /* loop over colors */
242 #if defined(JACOBIANCOLOROPT)
243     ierr = PetscTime(&t0);CHKERRQ(ierr);
244 #endif
245     coloring->currentcolor = k;
246 
247     /*
248       Loop over each column associated with color
249       adding the perturbation to the vector w3 = x1 + dx.
250     */
251     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
252     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
253     ncolumns_k = ncolumns[k];
254     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
255       col = columns[k][l];
256       w3_array[col] += vscale_array[col];
257     }
258     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
259 #if defined(JACOBIANCOLOROPT)
260     ierr = PetscTime(&t1);CHKERRQ(ierr);
261     t_dx += t1 - t0;
262 #endif
263 
264     /*
265       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
266                            w2 = F(x1 + dx) - F(x1)
267     */
268 #if defined(JACOBIANCOLOROPT)
269     ierr = PetscTime(&t0);CHKERRQ(ierr);
270 #endif
271     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
272     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
273     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
274 #if defined(JACOBIANCOLOROPT)
275     ierr = PetscTime(&t1);CHKERRQ(ierr);
276     t_func += t1 - t0;
277 #endif
278 
279 #if defined(JACOBIANCOLOROPT)
280     ierr = PetscTime(&t0);CHKERRQ(ierr);
281 #endif
282     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
283 
284     /*
285       Loop over rows of vector, putting w2/dx into Jacobian matrix
286     */
287     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
288     nrows_k = nrows[k];
289     for (l=0; l<nrows_k; l++) { /* loop over rows */
290 #if defined(JACOBIANCOLOROPT)
291     ierr = PetscTime(&t00);CHKERRQ(ierr);
292 #endif
293       row   = coloring->rowcolden2sp3[nz++];
294       col   = coloring->rowcolden2sp3[nz++];
295       spidx = coloring->rowcolden2sp3[nz++];
296 #if defined(JACOBIANCOLOROPT)
297       ierr = PetscTime(&t11);CHKERRQ(ierr);
298       t_kl += t11 - t00;
299 #endif
300       //printf(" row col val
301       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
302       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
303     }
304     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
305 
306 #if defined(JACOBIANCOLOROPT)
307     ierr = PetscTime(&t1);CHKERRQ(ierr);
308     t_setvals += t1 - t0;
309 #endif
310   } /* endof for each color */
311 #if defined(JACOBIANCOLOROPT)
312   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
313   printf("     FDColorApply time: kl %g\n",t_kl);
314 #endif
315   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
316   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
317 
318   coloring->currentcolor = -1;
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
324 /*
325     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
326 */
327 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
328 {
329   PetscErrorCode ierr;
330   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
331   const PetscInt *is,*rows,*ci,*cj;
332   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
333   IS             *isa;
334   PetscBool      isBAIJ;
335   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
336 
337   PetscFunctionBegin;
338   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
339 
340   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
341      SeqBAIJ calls this routine, thus check it */
342   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
343   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
344   if (!isBAIJ) {
345     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
346   }
347   N         = mat->cmap->N/bs;
348   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
349   c->N      = mat->cmap->N/bs;
350   c->m      = mat->rmap->N/bs;
351   c->rstart = 0;
352 
353   c->ncolors = nis;
354   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
355   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
356   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
357   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
358 
359   if (isBAIJ) {
360     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
361   } else {
362     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
363   }
364 
365   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
366   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
367 
368   nz = 0;
369   for (i=0; i<nis; i++) { /* loop over colors */
370     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
371     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
372 
373     c->ncolumns[i] = n;
374     if (n) {
375       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
376       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
377     } else {
378       c->columns[i] = 0;
379     }
380 
381     /* fast, crude version requires O(N*N) work */
382     nrows = 0;
383     for (j=0; j<n; j++) {  /* loop over columns */
384       col  = is[j];
385       rows = cj + ci[col];
386       m    = ci[col+1] - ci[col];
387       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
388         rowhit[*rows]     = col + 1;
389         spidxhit[*rows++] = spidx[ci[col] + k];
390         nrows++;
391       }
392     }
393     c->nrows[i] = nrows; /* total num of rows for this color */
394 
395     nrows = 0;
396     for (j=0; j<N; j++) { /* loop over rows */
397       if (rowhit[j]) {
398         c->rowcolden2sp3[nz++]   = j;             /* row index */
399         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
400         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
401         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
402       }
403     }
404     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
405   }
406   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
407 
408   if (isBAIJ) {
409     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
410   } else {
411     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
412   }
413   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
414   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
415 
416   c->ctype                  = IS_COLORING_GHOSTED;
417   if (isBAIJ) {
418     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
419   } else {
420     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
421   }
422   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
428 /*
429     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
430     This is why it has the ugly code with the MatGetBlockSize()
431 */
432 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
433 {
434   PetscErrorCode ierr;
435   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
436   const PetscInt *is,*rows,*ci,*cj;
437   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
438   IS             *isa;
439   PetscBool      done,flg = PETSC_FALSE;
440   PetscBool      flg1;
441   PetscBool      new_impl=PETSC_FALSE;
442 
443   PetscFunctionBegin;
444   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
445   if (new_impl){
446     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
447     PetscFunctionReturn(0);
448   }
449   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
450 
451   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
452   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
453      SeqBAIJ calls this routine, thus check it */
454   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
455   if (flg1) {
456     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
457   }
458 
459   N         = mat->cmap->N/bs;
460   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
461   c->N      = mat->cmap->N/bs;
462   c->m      = mat->rmap->N/bs;
463   c->rstart = 0;
464 
465   c->ncolors = nis;
466   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
467   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
468   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
469   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
470   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
471 
472   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
473   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
474 
475   /*
476      Temporary option to allow for debugging/testing
477   */
478   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
479 
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
481   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
482 
483   for (i=0; i<nis; i++) {
484     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
485     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
486 
487     c->ncolumns[i] = n;
488     if (n) {
489       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
490       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
491     } else {
492       c->columns[i] = 0;
493     }
494 
495     if (!flg) { /* ------------------------------------------------------------------------------*/
496       /* fast, crude version requires O(N*N) work */
497       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
498       /* loop over columns*/
499       for (j=0; j<n; j++) {
500         col  = is[j];
501         rows = cj + ci[col];
502         m    = ci[col+1] - ci[col];
503         /* loop over columns marking them in rowhit */
504         for (k=0; k<m; k++) {
505           rowhit[*rows++] = col + 1;
506         }
507       }
508       /* count the number of hits */
509       nrows = 0;
510       for (j=0; j<N; j++) {
511         if (rowhit[j]) nrows++;
512       }
513       c->nrows[i] = nrows;
514       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
515       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
516       nrows       = 0;
517       for (j=0; j<N; j++) {
518         if (rowhit[j]) {
519           c->rows[i][nrows]          = j;
520           c->columnsforrow[i][nrows] = rowhit[j] - 1;
521           nrows++;
522         }
523       }
524     } else {  /*-------------------------------------------------------------------------------*/
525       /* slow version, using rowhit as a linked list */
526       PetscInt currentcol,fm,mfm;
527       rowhit[N] = N;
528       nrows     = 0;
529       /* loop over columns */
530       for (j=0; j<n; j++) {
531         col  = is[j];
532         rows = cj + ci[col];
533         m    = ci[col+1] - ci[col];
534         /* loop over columns marking them in rowhit */
535         fm = N;    /* fm points to first entry in linked list */
536         for (k=0; k<m; k++) {
537           currentcol = *rows++;
538           /* is it already in the list? */
539           do {
540             mfm = fm;
541             fm  = rowhit[fm];
542           } while (fm < currentcol);
543           /* not in list so add it */
544           if (fm != currentcol) {
545             nrows++;
546             columnsforrow[currentcol] = col;
547             /* next three lines insert new entry into linked list */
548             rowhit[mfm]        = currentcol;
549             rowhit[currentcol] = fm;
550             fm                 = currentcol;
551             /* fm points to present position in list since we know the columns are sorted */
552           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
553         }
554       }
555       c->nrows[i] = nrows;
556       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
557       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
558       /* now store the linked list of rows into c->rows[i] */
559       nrows = 0;
560       fm    = rowhit[N];
561       do {
562         c->rows[i][nrows]            = fm;
563         c->columnsforrow[i][nrows++] = columnsforrow[fm];
564         fm                           = rowhit[fm];
565       } while (fm < N);
566     } /* ---------------------------------------------------------------------------------------*/
567     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
568   }
569   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
570 
571   ierr = PetscFree(rowhit);CHKERRQ(ierr);
572   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
573 
574   /* Optimize by adding the vscale, and scaleforrow[][] fields */
575   /*
576        see the version for MPIAIJ
577   */
578   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
579   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
580   for (k=0; k<c->ncolors; k++) {
581     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
582     for (l=0; l<c->nrows[k]; l++) {
583       col = c->columnsforrow[k][l];
584 
585       c->vscaleforrow[k][l] = col;
586     }
587   }
588   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592