xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision fcd7ac738093a9eb22150a42b9767d03cd72eb40)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19   PetscScalar    *ca = csp->a,*ca_l;
20 
21   PetscFunctionBegin;
22   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24   if (flg) {
25     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26   } else {
27     PetscBool assembled;
28     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29     if (assembled) {
30       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31     }
32   }
33   if (!coloring->vscale) {
34     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35   }
36 
37   /* Set w1 = F(x1) */
38   if (!coloring->fset) {
39     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42   } else {
43     coloring->fset = PETSC_FALSE;
44   }
45 
46   if (!coloring->w3) {
47     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49   }
50   w3 = coloring->w3;
51 
52   /* Compute local scale factors: vscale = dx */
53   if (coloring->htype[0] == 'w') {
54     /* tacky test; need to make systematic if we add other approaches to computing h*/
55     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56     dx = (1.0 + unorm)*epsilon;
57     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58   } else {
59     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61     for (col=0; col<N; col++) {
62       dx = xx[col];
63       if (dx == 0.0) dx = 1.0;
64       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66       dx               *= epsilon;
67       vscale_array[col] = dx;
68     }
69     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
71   }
72 
73   nz = 0;
74   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76     coloring->currentcolor = k;
77 
78     /* Compute w3 = x1 + dx */
79     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80     dy_i = dy;
81     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83       for (l=0; l<coloring->ncolumns[k]; l++) {
84         col            = i + bs*coloring->columns[k][l];
85         w3_array[col] += vscale_array[col];
86         if (i) {
87           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88         }
89       }
90       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91 
92       /* Evaluate function w2 = F(w3) - F(x1) */
93       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98       ierr = VecResetArray(w2);CHKERRQ(ierr);
99       dy_i += N; /* points to dy+i*N */
100     }
101 
102     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103     for (l=0; l<coloring->nrows[k]; l++) {
104       row   = bs*coloring->rowcolden2sp3[nz++];
105       col   = bs*coloring->rowcolden2sp3[nz++];
106       ca_l  = ca + bs2*coloring->rowcolden2sp3[nz++];
107       spidx = 0;
108       dy_i  = dy;
109       for (i=0; i<bs; i++) {   /* column of the block */
110         for (j=0; j<bs; j++) { /* row of the block */
111           ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i];
112         }
113         dy_i += N; /* points to dy+i*N */
114       }
115     }
116   } /* endof for each color */
117   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
118 
119   coloring->currentcolor = -1;
120   PetscFunctionReturn(0);
121 }
122 
123 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
126 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
127 {
128   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
129   PetscErrorCode ierr;
130   PetscInt       k,l,row,col,N;
131   PetscScalar    dx,*y,*xx,*w3_array;
132   PetscScalar    *vscale_array;
133   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
134   Vec            w1=coloring->w1,w2=coloring->w2,w3;
135   void           *fctx=coloring->fctx;
136   PetscBool      flg=PETSC_FALSE;
137   Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
138   PetscScalar    *ca=csp->a;
139   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
140   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
141 
142   PetscFunctionBegin;
143   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
144   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
145   if (flg) {
146     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
147   } else {
148     PetscBool assembled;
149     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
150     if (assembled) {
151       ierr = MatZeroEntries(J);CHKERRQ(ierr);
152     }
153   }
154   if (!coloring->vscale) {
155     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
156   }
157 
158   /* Set w1 = F(x1) */
159   if (!coloring->fset) {
160     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
161     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
162     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
163   } else {
164     coloring->fset = PETSC_FALSE;
165   }
166 
167   if (!coloring->w3) {
168     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
169     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
170   }
171   w3 = coloring->w3;
172 
173   /* Compute scale factors: vscale = dx */
174   if (coloring->htype[0] == 'w') {
175     /* tacky test; need to make systematic if we add other approaches to computing h*/
176     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
177     dx = (1.0 + unorm)*epsilon;
178     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
179   } else {
180     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
181     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
182     ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
183     for (col=0; col<N; col++) {
184       dx = xx[col];
185       if (dx == 0.0) dx = 1.0;
186       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
187       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
188       dx               *= epsilon;
189       vscale_array[col] = dx;
190     }
191     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
192     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
193   }
194 
195   nz  = 0;
196   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
197   for (k=0; k<ncolors; k++) { /* loop over colors */
198     coloring->currentcolor = k;
199 
200     /*
201       Loop over each column associated with color
202       adding the perturbation to the vector w3 = x1 + dx.
203     */
204     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
205     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
206     ncolumns_k = ncolumns[k];
207     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
208       col = columns[k][l];
209       w3_array[col] += vscale_array[col];
210     }
211     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
212 
213     /*
214       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
215                            w2 = F(x1 + dx) - F(x1)
216     */
217     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
218     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
219     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
220     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
221 
222     /*
223       Loop over rows of vector, putting w2/dx into Jacobian matrix
224     */
225     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
226     nrows_k = nrows[k];
227     for (l=0; l<nrows_k; l++) { /* loop over rows */
228       row   = coloring->rowcolden2sp3[nz++];
229       col   = coloring->rowcolden2sp3[nz++];
230       spidx = coloring->rowcolden2sp3[nz++];
231       ca[spidx] = y[row]/vscale_array[col];
232     }
233     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
234   } /* endof for each color */
235   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
236 
237   coloring->currentcolor = -1;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
243 /*
244     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
245 */
246 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
247 {
248   PetscErrorCode ierr;
249   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
250   const PetscInt *is,*rows,*ci,*cj;
251   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
252   IS             *isa;
253   PetscBool      isBAIJ;
254   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
255 
256   PetscFunctionBegin;
257   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
258 
259   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
260      SeqBAIJ calls this routine, thus check it */
261   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
262   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
263   if (!isBAIJ) {
264     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
265   }
266   N         = mat->cmap->N/bs;
267   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
268   c->N      = mat->cmap->N/bs;
269   c->m      = mat->rmap->N/bs;
270   c->rstart = 0;
271 
272   c->ncolors = nis;
273   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
274   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
275   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
276   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
277 
278   if (isBAIJ) {
279     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
280   } else {
281     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
282   }
283 
284   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
285   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
286 
287   nz = 0;
288   for (i=0; i<nis; i++) { /* loop over colors */
289     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
290     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
291 
292     c->ncolumns[i] = n;
293     if (n) {
294       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
295       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
296     } else {
297       c->columns[i] = 0;
298     }
299 
300     /* fast, crude version requires O(N*N) work */
301     nrows = 0;
302     for (j=0; j<n; j++) {  /* loop over columns */
303       col  = is[j];
304       rows = cj + ci[col];
305       m    = ci[col+1] - ci[col];
306       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
307         rowhit[*rows]     = col + 1;
308         spidxhit[*rows++] = spidx[ci[col] + k];
309         nrows++;
310       }
311     }
312     c->nrows[i] = nrows; /* total num of rows for this color */
313 
314     nrows = 0;
315     for (j=0; j<N; j++) { /* loop over rows */
316       if (rowhit[j]) {
317         c->rowcolden2sp3[nz++]   = j;             /* row index */
318         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
319         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
320         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
321       }
322     }
323     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
324   }
325   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
326 
327   if (isBAIJ) {
328     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
329   } else {
330     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
331   }
332   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
333   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
334 
335   c->ctype                  = IS_COLORING_GHOSTED;
336   if (isBAIJ) {
337     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
338     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
339   } else {
340     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
341   }
342   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
348 /*
349     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
350     This is why it has the ugly code with the MatGetBlockSize()
351 */
352 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
353 {
354   PetscErrorCode ierr;
355   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
356   const PetscInt *is,*rows,*ci,*cj;
357   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
358   IS             *isa;
359   PetscBool      done,flg = PETSC_FALSE;
360   PetscBool      flg1;
361   PetscBool      new_impl=PETSC_FALSE;
362 
363   PetscFunctionBegin;
364   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
365   if (new_impl){
366     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
367     PetscFunctionReturn(0);
368   }
369   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
370 
371   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
372   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
373      SeqBAIJ calls this routine, thus check it */
374   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
375   if (flg1) {
376     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
377   }
378 
379   N         = mat->cmap->N/bs;
380   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
381   c->N      = mat->cmap->N/bs;
382   c->m      = mat->rmap->N/bs;
383   c->rstart = 0;
384 
385   c->ncolors = nis;
386   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
387   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
388   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
389   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
390   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
391 
392   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
393   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
394 
395   /*
396      Temporary option to allow for debugging/testing
397   */
398   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
399 
400   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
401   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
402 
403   for (i=0; i<nis; i++) {
404     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
405     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
406 
407     c->ncolumns[i] = n;
408     if (n) {
409       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
410       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
411     } else {
412       c->columns[i] = 0;
413     }
414 
415     if (!flg) { /* ------------------------------------------------------------------------------*/
416       /* fast, crude version requires O(N*N) work */
417       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
418       /* loop over columns*/
419       for (j=0; j<n; j++) {
420         col  = is[j];
421         rows = cj + ci[col];
422         m    = ci[col+1] - ci[col];
423         /* loop over columns marking them in rowhit */
424         for (k=0; k<m; k++) {
425           rowhit[*rows++] = col + 1;
426         }
427       }
428       /* count the number of hits */
429       nrows = 0;
430       for (j=0; j<N; j++) {
431         if (rowhit[j]) nrows++;
432       }
433       c->nrows[i] = nrows;
434       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
435       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
436       nrows       = 0;
437       for (j=0; j<N; j++) {
438         if (rowhit[j]) {
439           c->rows[i][nrows]          = j;
440           c->columnsforrow[i][nrows] = rowhit[j] - 1;
441           nrows++;
442         }
443       }
444     } else {  /*-------------------------------------------------------------------------------*/
445       /* slow version, using rowhit as a linked list */
446       PetscInt currentcol,fm,mfm;
447       rowhit[N] = N;
448       nrows     = 0;
449       /* loop over columns */
450       for (j=0; j<n; j++) {
451         col  = is[j];
452         rows = cj + ci[col];
453         m    = ci[col+1] - ci[col];
454         /* loop over columns marking them in rowhit */
455         fm = N;    /* fm points to first entry in linked list */
456         for (k=0; k<m; k++) {
457           currentcol = *rows++;
458           /* is it already in the list? */
459           do {
460             mfm = fm;
461             fm  = rowhit[fm];
462           } while (fm < currentcol);
463           /* not in list so add it */
464           if (fm != currentcol) {
465             nrows++;
466             columnsforrow[currentcol] = col;
467             /* next three lines insert new entry into linked list */
468             rowhit[mfm]        = currentcol;
469             rowhit[currentcol] = fm;
470             fm                 = currentcol;
471             /* fm points to present position in list since we know the columns are sorted */
472           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
473         }
474       }
475       c->nrows[i] = nrows;
476       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
477       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
478       /* now store the linked list of rows into c->rows[i] */
479       nrows = 0;
480       fm    = rowhit[N];
481       do {
482         c->rows[i][nrows]            = fm;
483         c->columnsforrow[i][nrows++] = columnsforrow[fm];
484         fm                           = rowhit[fm];
485       } while (fm < N);
486     } /* ---------------------------------------------------------------------------------------*/
487     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
488   }
489   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
490 
491   ierr = PetscFree(rowhit);CHKERRQ(ierr);
492   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
493 
494   /* Optimize by adding the vscale, and scaleforrow[][] fields */
495   /*
496        see the version for MPIAIJ
497   */
498   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
499   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
500   for (k=0; k<c->ncolors; k++) {
501     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
502     for (l=0; l<c->nrows[k]; l++) {
503       col = c->columnsforrow[k][l];
504 
505       c->vscaleforrow[k][l] = col;
506     }
507   }
508   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512