xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision ba7469d95453f8c5091c51ae3b026bdf3e7c55db)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 /* #define STOREVALS */
6 #undef __FUNCT__
7 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
8 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
9 {
10   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
11   PetscErrorCode ierr;
12   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N;
13   PetscScalar    dx,*y,*xx,*w3_array;
14   PetscScalar    *vscale_array;
15   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
16   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
17   void           *fctx   = coloring->fctx;
18   PetscBool      flg     = PETSC_FALSE;
19   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
20   PetscScalar    *ca = csp->a,*ca_l;
21   PetscInt       brow,bcol,bspidx,spidx,nz,nz_i;
22 #if defined(STOREVALS)
23   PetscScalar   *vals,*vals_l;
24 #endif
25 
26 
27   PetscFunctionBegin;
28   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
29   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
30   if (flg) {
31     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
32   } else {
33     PetscBool assembled;
34     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
35     if (assembled) {
36       ierr = MatZeroEntries(J);CHKERRQ(ierr);
37     }
38   }
39 
40   if (!coloring->vscale) {
41     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
42   }
43 
44   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
45     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
46   }
47 
48   /* Set w1 = F(x1) */
49   if (!coloring->fset) {
50     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
51     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
52     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
53   } else {
54     coloring->fset = PETSC_FALSE;
55   }
56 
57   if (!coloring->w3) {
58     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
59     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
60   }
61   w3 = coloring->w3;
62 
63   /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */
64   ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr);
65   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
66   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
67 
68   /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
69   for (col=0; col<N; col++) {
70     if (coloring->htype[0] == 'w') {
71       dx = 1.0 + unorm;
72     } else {
73       dx = xx[col];
74     }
75     if (dx == (PetscScalar)0.0) dx = 1.0;
76     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
77     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
78     dx               *= epsilon;
79     vscale_array[col] = (PetscScalar)dx;
80   }
81 
82 #if defined(STOREVALS)
83   ierr = PetscMalloc(N*bs*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
84 #endif
85 
86   nz = 0;
87   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
88     coloring->currentcolor = k;
89     nz_i = nz;
90     for (i=0; i<bs; i++) {
91       nz = nz_i;
92       ierr = VecCopy(x1,w3);CHKERRQ(ierr);
93       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
94 
95       /*
96         Loop over each column associated with color
97         adding the perturbation to the vector w3.
98       */
99       for (l=0; l<coloring->ncolumns[k]; l++) {
100         col = i + bs*coloring->columns[k][l];
101         w3_array[col] += vscale_array[col];
102       }
103       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
104 
105       /*
106         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
107         w2 = F(x1 + dx) - F(x1)
108       */
109       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
110       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
111       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
112       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
113 
114       /*
115         Loop over rows of vector, putting results into Jacobian matrix
116       */
117       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
118       for (l=0; l<coloring->nrows[k]; l++) {
119         brow   = coloring->rowcolden2sp3[nz++];
120         bcol   = coloring->rowcolden2sp3[nz++];
121         bspidx = coloring->rowcolden2sp3[nz++];
122 #if defined(STOREVALS)
123         vals_l = vals + l*bs2;
124 #endif
125         row   = bs*brow;
126         col   = bs*bcol + i;
127 #if !defined(STOREVALS)
128         ca_l  = ca + bs2*bspidx + i*bs;
129 #endif
130         for (j=0; j<bs; j++) {
131 #if defined(STOREVALS)
132           vals_l[i*bs+j] = y[row+j]/vscale_array[col];
133 #else
134           ca_l[j] = y[row+j]/vscale_array[col];
135 #endif
136         }
137       }
138       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
139     } /* endof for (i=0; i<bs; i++) */
140 
141 #if defined(STOREVALS)
142     /* insert vals to J */
143     nz = nz_i;
144     for (l=0; l<coloring->nrows[k]; l++) {
145       nz += 2;
146       bspidx = coloring->rowcolden2sp3[nz++];
147       vals_l  = vals + l*bs2;
148       ca_l    = ca + bs2*bspidx;
149       for (i=0; i<bs2; i++) {
150         ca_l[i] = vals_l[i];
151       }
152     }
153 #endif
154   } /* endof for each color */
155   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
156   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
157 #if defined(STOREVALS)
158   ierr = PetscFree(vals);CHKERRQ(ierr);
159 #endif
160 
161   coloring->currentcolor = -1;
162   PetscFunctionReturn(0);
163 }
164 
165 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
166 /* #define JACOBIANCOLOROPT */
167 #if defined(JACOBIANCOLOROPT)
168 #include <petsctime.h>
169 #endif
170 #undef __FUNCT__
171 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
172 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
173 {
174   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
175   PetscErrorCode ierr;
176   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
177   PetscScalar    dx,*y,*xx,*w3_array;
178   PetscScalar    *vscale_array;
179   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
180   Vec            w1=coloring->w1,w2=coloring->w2,w3;
181   void           *fctx=coloring->fctx;
182   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
183   PetscScalar    *ca;
184   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
185   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
186 #if defined(JACOBIANCOLOROPT)
187   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
188 #endif
189 
190   PetscFunctionBegin;
191 #if defined(JACOBIANCOLOROPT)
192   ierr = PetscTime(&t0);CHKERRQ(ierr);
193 #endif
194   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
195   if (isBAIJ) {
196     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
197     ca = csp->a;
198   } else {
199     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
200     ca = csp->a;
201     bs = 1; bs2 = 1;
202   }
203 
204   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
205   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
206   if (flg) {
207     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
208   } else {
209     PetscBool assembled;
210     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
211     if (assembled) {
212       ierr = MatZeroEntries(J);CHKERRQ(ierr);
213     }
214   }
215 
216   if (!coloring->vscale) {
217     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
218   }
219 
220   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
221     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
222   }
223 #if defined(JACOBIANCOLOROPT)
224     ierr = PetscTime(&t1);CHKERRQ(ierr);
225     t_init += t1 - t0;
226 #endif
227 
228   /* Set w1 = F(x1) */
229 #if defined(JACOBIANCOLOROPT)
230     ierr = PetscTime(&t0);CHKERRQ(ierr);
231 #endif
232   if (!coloring->fset) {
233     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
234     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
235     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
236   } else {
237     coloring->fset = PETSC_FALSE;
238   }
239 #if defined(JACOBIANCOLOROPT)
240     ierr = PetscTime(&t1);CHKERRQ(ierr);
241     t_setvals += t1 - t0;
242 #endif
243 
244 #if defined(JACOBIANCOLOROPT)
245     ierr = PetscTime(&t0);CHKERRQ(ierr);
246 #endif
247   if (!coloring->w3) {
248     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
249     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
250   }
251   w3 = coloring->w3;
252 
253   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
254   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
255   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
256   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
257   for (col=0; col<N; col++) {
258     if (coloring->htype[0] == 'w') {
259       dx = 1.0 + unorm;
260     } else {
261       dx = xx[col];
262     }
263     if (dx == (PetscScalar)0.0) dx = 1.0;
264     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
265     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
266     dx               *= epsilon;
267     vscale_array[col] = (PetscScalar)dx;
268   }
269 #if defined(JACOBIANCOLOROPT)
270     ierr = PetscTime(&t1);CHKERRQ(ierr);
271     t_init += t1 - t0;
272 #endif
273 
274   nz  = 0;
275   for (k=0; k<ncolors; k++) { /* loop over colors */
276 #if defined(JACOBIANCOLOROPT)
277     ierr = PetscTime(&t0);CHKERRQ(ierr);
278 #endif
279     coloring->currentcolor = k;
280 
281     /*
282       Loop over each column associated with color
283       adding the perturbation to the vector w3 = x1 + dx.
284     */
285     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
286     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
287     ncolumns_k = ncolumns[k];
288     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
289       col = columns[k][l];
290       w3_array[col] += vscale_array[col];
291     }
292     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
293 #if defined(JACOBIANCOLOROPT)
294     ierr = PetscTime(&t1);CHKERRQ(ierr);
295     t_dx += t1 - t0;
296 #endif
297 
298     /*
299       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
300                            w2 = F(x1 + dx) - F(x1)
301     */
302 #if defined(JACOBIANCOLOROPT)
303     ierr = PetscTime(&t0);CHKERRQ(ierr);
304 #endif
305     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
306     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
307     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
308 #if defined(JACOBIANCOLOROPT)
309     ierr = PetscTime(&t1);CHKERRQ(ierr);
310     t_func += t1 - t0;
311 #endif
312 
313 #if defined(JACOBIANCOLOROPT)
314     ierr = PetscTime(&t0);CHKERRQ(ierr);
315 #endif
316     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
317 
318     /*
319       Loop over rows of vector, putting w2/dx into Jacobian matrix
320     */
321     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
322     nrows_k = nrows[k];
323     for (l=0; l<nrows_k; l++) { /* loop over rows */
324 #if defined(JACOBIANCOLOROPT)
325     ierr = PetscTime(&t00);CHKERRQ(ierr);
326 #endif
327       row   = coloring->rowcolden2sp3[nz++];
328       col   = coloring->rowcolden2sp3[nz++];
329       spidx = coloring->rowcolden2sp3[nz++];
330 #if defined(JACOBIANCOLOROPT)
331       ierr = PetscTime(&t11);CHKERRQ(ierr);
332       t_kl += t11 - t00;
333 #endif
334       //printf(" row col val
335       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
336       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
337     }
338     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
339 
340 #if defined(JACOBIANCOLOROPT)
341     ierr = PetscTime(&t1);CHKERRQ(ierr);
342     t_setvals += t1 - t0;
343 #endif
344   } /* endof for each color */
345 #if defined(JACOBIANCOLOROPT)
346   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
347   printf("     FDColorApply time: kl %g\n",t_kl);
348 #endif
349   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
350   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
351 
352   coloring->currentcolor = -1;
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
358 /*
359     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
360 */
361 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
362 {
363   PetscErrorCode ierr;
364   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
365   const PetscInt *is,*rows,*ci,*cj;
366   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
367   IS             *isa;
368   PetscBool      isBAIJ;
369   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
370 
371   PetscFunctionBegin;
372   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
373 
374   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
375      SeqBAIJ calls this routine, thus check it */
376   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
377   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
378   if (!isBAIJ) {
379     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
380   }
381   N         = mat->cmap->N/bs;
382   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
383   c->N      = mat->cmap->N/bs;
384   c->m      = mat->rmap->N/bs;
385   c->rstart = 0;
386 
387   c->ncolors = nis;
388   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
389   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
390   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
391   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
392 
393   if (isBAIJ) {
394     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
395   } else {
396     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
397   }
398 
399   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
400   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
401 
402   nz = 0;
403   for (i=0; i<nis; i++) { /* loop over colors */
404     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
405     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
406 
407     c->ncolumns[i] = n;
408     if (n) {
409       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
410       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
411     } else {
412       c->columns[i] = 0;
413     }
414 
415     /* fast, crude version requires O(N*N) work */
416     nrows = 0;
417     for (j=0; j<n; j++) {  /* loop over columns */
418       col  = is[j];
419       rows = cj + ci[col];
420       m    = ci[col+1] - ci[col];
421       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
422         rowhit[*rows]     = col + 1;
423         spidxhit[*rows++] = spidx[ci[col] + k];
424         nrows++;
425       }
426     }
427     c->nrows[i] = nrows; /* total num of rows for this color */
428 
429     nrows = 0;
430     for (j=0; j<N; j++) { /* loop over rows */
431       if (rowhit[j]) {
432         c->rowcolden2sp3[nz++]   = j;             /* row index */
433         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
434         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
435         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
436       }
437     }
438     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
439   }
440   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
441 
442   if (isBAIJ) {
443     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
444   } else {
445     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
446   }
447   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
448   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
449 
450   c->ctype                  = IS_COLORING_GHOSTED;
451   if (isBAIJ) {
452     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
453   } else {
454     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
455   }
456   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
462 /*
463     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
464     This is why it has the ugly code with the MatGetBlockSize()
465 */
466 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
467 {
468   PetscErrorCode ierr;
469   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
470   const PetscInt *is,*rows,*ci,*cj;
471   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
472   IS             *isa;
473   PetscBool      done,flg = PETSC_FALSE;
474   PetscBool      flg1;
475   PetscBool      new_impl=PETSC_FALSE;
476 
477   PetscFunctionBegin;
478   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
479   if (new_impl){
480     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
481     PetscFunctionReturn(0);
482   }
483   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
484 
485   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
486   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
487      SeqBAIJ calls this routine, thus check it */
488   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
489   if (flg1) {
490     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
491   }
492 
493   N         = mat->cmap->N/bs;
494   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
495   c->N      = mat->cmap->N/bs;
496   c->m      = mat->rmap->N/bs;
497   c->rstart = 0;
498 
499   c->ncolors = nis;
500   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
501   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
502   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
503   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
504   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
505 
506   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
507   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
508 
509   /*
510      Temporary option to allow for debugging/testing
511   */
512   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
513 
514   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
515   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
516 
517   for (i=0; i<nis; i++) {
518     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
519     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
520 
521     c->ncolumns[i] = n;
522     if (n) {
523       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
524       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
525     } else {
526       c->columns[i] = 0;
527     }
528 
529     if (!flg) { /* ------------------------------------------------------------------------------*/
530       /* fast, crude version requires O(N*N) work */
531       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
532       /* loop over columns*/
533       for (j=0; j<n; j++) {
534         col  = is[j];
535         rows = cj + ci[col];
536         m    = ci[col+1] - ci[col];
537         /* loop over columns marking them in rowhit */
538         for (k=0; k<m; k++) {
539           rowhit[*rows++] = col + 1;
540         }
541       }
542       /* count the number of hits */
543       nrows = 0;
544       for (j=0; j<N; j++) {
545         if (rowhit[j]) nrows++;
546       }
547       c->nrows[i] = nrows;
548       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
549       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
550       nrows       = 0;
551       for (j=0; j<N; j++) {
552         if (rowhit[j]) {
553           c->rows[i][nrows]          = j;
554           c->columnsforrow[i][nrows] = rowhit[j] - 1;
555           nrows++;
556         }
557       }
558     } else {  /*-------------------------------------------------------------------------------*/
559       /* slow version, using rowhit as a linked list */
560       PetscInt currentcol,fm,mfm;
561       rowhit[N] = N;
562       nrows     = 0;
563       /* loop over columns */
564       for (j=0; j<n; j++) {
565         col  = is[j];
566         rows = cj + ci[col];
567         m    = ci[col+1] - ci[col];
568         /* loop over columns marking them in rowhit */
569         fm = N;    /* fm points to first entry in linked list */
570         for (k=0; k<m; k++) {
571           currentcol = *rows++;
572           /* is it already in the list? */
573           do {
574             mfm = fm;
575             fm  = rowhit[fm];
576           } while (fm < currentcol);
577           /* not in list so add it */
578           if (fm != currentcol) {
579             nrows++;
580             columnsforrow[currentcol] = col;
581             /* next three lines insert new entry into linked list */
582             rowhit[mfm]        = currentcol;
583             rowhit[currentcol] = fm;
584             fm                 = currentcol;
585             /* fm points to present position in list since we know the columns are sorted */
586           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
587         }
588       }
589       c->nrows[i] = nrows;
590       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
591       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
592       /* now store the linked list of rows into c->rows[i] */
593       nrows = 0;
594       fm    = rowhit[N];
595       do {
596         c->rows[i][nrows]            = fm;
597         c->columnsforrow[i][nrows++] = columnsforrow[fm];
598         fm                           = rowhit[fm];
599       } while (fm < N);
600     } /* ---------------------------------------------------------------------------------------*/
601     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
602   }
603   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
604 
605   ierr = PetscFree(rowhit);CHKERRQ(ierr);
606   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
607 
608   /* Optimize by adding the vscale, and scaleforrow[][] fields */
609   /*
610        see the version for MPIAIJ
611   */
612   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
613   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
614   for (k=0; k<c->ncolors; k++) {
615     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
616     for (l=0; l<c->nrows[k]; l++) {
617       col = c->columnsforrow[k][l];
618 
619       c->vscaleforrow[k][l] = col;
620     }
621   }
622   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626