xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 4b2e90ca22e5d236f72fef601ed2fa3dbdcf4eae)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   PetscScalar    *valaddr;
19   MatEntry       *Jentry=coloring->matentry;
20 
21   PetscFunctionBegin;
22   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24   if (flg) {
25     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26   } else {
27     PetscBool assembled;
28     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29     if (assembled) {
30       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31     }
32   }
33   if (!coloring->vscale) {
34     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35   }
36 
37   /* Set w1 = F(x1) */
38   if (!coloring->fset) {
39     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42   } else {
43     coloring->fset = PETSC_FALSE;
44   }
45 
46   if (!coloring->w3) {
47     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49   }
50   w3 = coloring->w3;
51 
52   /* Compute local scale factors: vscale = dx */
53   if (coloring->htype[0] == 'w') {
54     /* tacky test; need to make systematic if we add other approaches to computing h*/
55     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56     dx = (1.0 + unorm)*epsilon;
57     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58   } else {
59     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61     for (col=0; col<N; col++) {
62       dx = xx[col];
63       if (dx == 0.0) dx = 1.0;
64       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66       dx               *= epsilon;
67       vscale_array[col] = dx;
68     }
69     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
71   }
72 
73   nz = 0;
74   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76     coloring->currentcolor = k;
77 
78     /* Compute w3 = x1 + dx */
79     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80     dy_i = dy;
81     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83       for (l=0; l<coloring->ncolumns[k]; l++) {
84         col            = i + bs*coloring->columns[k][l];
85         w3_array[col] += vscale_array[col];
86         if (i) {
87           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88         }
89       }
90       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91 
92       /* Evaluate function w2 = F(w3) - F(x1) */
93       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98       ierr = VecResetArray(w2);CHKERRQ(ierr);
99       dy_i += N; /* points to dy+i*N */
100     }
101 
102     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103     for (l=0; l<coloring->nrows[k]; l++) {
104       row     = bs*Jentry[nz].row;
105       col     = bs*Jentry[nz].col;
106       valaddr = Jentry[nz].valaddr;
107       nz++;
108       spidx = 0;
109       dy_i  = dy;
110       for (i=0; i<bs; i++) {   /* column of the block */
111         for (j=0; j<bs; j++) { /* row of the block */
112           valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i];
113         }
114         dy_i += N; /* points to dy+i*N */
115       }
116     }
117   } /* endof for each color */
118   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
119 
120   coloring->currentcolor = -1;
121   PetscFunctionReturn(0);
122 }
123 
124 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
127 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
128 {
129   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
130   PetscErrorCode ierr;
131   PetscInt       k,l,row,col,N;
132   PetscScalar    dx,*y,*xx,*w3_array;
133   PetscScalar    *vscale_array;
134   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
135   Vec            w1=coloring->w1,w2=coloring->w2,w3;
136   void           *fctx=coloring->fctx;
137   PetscBool      flg=PETSC_FALSE;
138   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
139   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz;
140   MatEntry       *Jentry=coloring->matentry;
141 
142   PetscFunctionBegin;
143   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
144   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
145   if (flg) {
146     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
147   } else {
148     PetscBool assembled;
149     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
150     if (assembled) {
151       ierr = MatZeroEntries(J);CHKERRQ(ierr);
152     }
153   }
154   if (!coloring->vscale) {
155     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
156   }
157 
158   /* Set w1 = F(x1) */
159   if (!coloring->fset) {
160     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
161     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
162     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
163   } else {
164     coloring->fset = PETSC_FALSE;
165   }
166 
167   if (!coloring->w3) {
168     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
169     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
170   }
171   w3 = coloring->w3;
172 
173   /* Compute scale factors: vscale = dx */
174   if (coloring->htype[0] == 'w') {
175     /* tacky test; need to make systematic if we add other approaches to computing h*/
176     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
177     dx = (1.0 + unorm)*epsilon;
178     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
179   } else {
180     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
181     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
182     ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
183     for (col=0; col<N; col++) {
184       dx = xx[col];
185       if (dx == 0.0) dx = 1.0;
186       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
187       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
188       dx               *= epsilon;
189       vscale_array[col] = dx;
190     }
191     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
192     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
193   }
194 
195   nz  = 0;
196   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
197   for (k=0; k<ncolors; k++) { /* loop over colors */
198     coloring->currentcolor = k;
199 
200     /*
201       Loop over each column associated with color
202       adding the perturbation to the vector w3 = x1 + dx.
203     */
204     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
205     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
206     ncolumns_k = ncolumns[k];
207     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
208       col = columns[k][l];
209       w3_array[col] += vscale_array[col];
210     }
211     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
212 
213     /*
214       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
215                            w2 = F(x1 + dx) - F(x1)
216     */
217     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
218     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
219     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
220     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
221 
222     /*
223       Loop over rows of vector, putting w2/dx into Jacobian matrix
224     */
225     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
226     nrows_k = nrows[k];
227     nrows_k = nrows[k];
228     for (l=0; l<nrows_k; l++) { /* loop over rows */
229       row                     = Jentry[nz].row;
230       col                     = Jentry[nz].col;
231       *(Jentry[nz++].valaddr) = y[row]/vscale_array[col];
232     }
233     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
234   } /* endof for each color */
235   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
236 
237   coloring->currentcolor = -1;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new"
243 /*
244     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
245 */
246 PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c)
247 {
248   PetscErrorCode ierr;
249   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
250   const PetscInt *is,*row,*ci,*cj;
251   PetscInt       nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz;
252   IS             *isa;
253   PetscBool      isBAIJ;
254   Mat_SeqAIJ     *spA = (Mat_SeqAIJ*)mat->data;
255   PetscScalar    *A_val=spA->a;
256   PetscScalar    **valaddrhit;
257   MatEntry       *Jentry;
258 
259   PetscFunctionBegin;
260   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
261 
262   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
263      SeqBAIJ calls this routine, thus check it */
264   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
265   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
266   if (!isBAIJ) {
267     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
268   }
269   N         = mat->cmap->N/bs;
270   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
271   c->N      = mat->cmap->N/bs;
272   c->m      = mat->rmap->N/bs;
273   c->rstart = 0;
274 
275   c->ncolors = nis;
276   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
277   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
278   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
279   ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
280 
281   ierr       = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
282   ierr       = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr);
283   c->matentry = Jentry;
284 
285   if (isBAIJ) {
286     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
287   } else {
288     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
289   }
290 
291   ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
292   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
293 
294   nz = 0;
295   for (i=0; i<nis; i++) { /* loop over colors */
296     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
297     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
298 
299     c->ncolumns[i] = n;
300     if (n) {
301       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
302       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
303       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
304     } else {
305       c->columns[i] = 0;
306     }
307 
308     /* fast, crude version requires O(N*N) work */
309     bs2   = bs*bs;
310     nrows = 0;
311     for (j=0; j<n; j++) {  /* loop over columns */
312       col    = is[j];
313       row    = cj + ci[col];
314       m      = ci[col+1] - ci[col];
315       nrows += m;
316       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
317         rowhit[*row]       = col + 1;
318         valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]];
319       }
320     }
321     c->nrows[i] = nrows; /* total num of rows for this color */
322 
323     for (j=0; j<N; j++) { /* loop over rows */
324       if (rowhit[j]) {
325         Jentry[nz].row     = j;              /* local row index */
326         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
327         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
328         nz++;
329         rowhit[j] = 0.0;                     /* zero rowhit for reuse */
330       }
331     }
332     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
333   }
334   if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz);
335 
336   if (isBAIJ) {
337     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
338   } else {
339     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
340   }
341   ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
342   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
343 
344   c->ctype                  = IS_COLORING_GHOSTED;
345   if (isBAIJ) {
346     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
347     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
348   } else {
349     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
350   }
351   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
357 /*
358     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
359     This is why it has the ugly code with the MatGetBlockSize()
360 */
361 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
362 {
363   PetscErrorCode ierr;
364   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
365   const PetscInt *is,*rows,*ci,*cj;
366   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
367   IS             *isa;
368   PetscBool      done,flg = PETSC_FALSE;
369   PetscBool      flg1;
370   PetscBool      new_impl=PETSC_FALSE;
371 
372   PetscFunctionBegin;
373   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
374   if (new_impl){
375     ierr =  MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr);
376     PetscFunctionReturn(0);
377   }
378   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
379 
380   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
381   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
382      SeqBAIJ calls this routine, thus check it */
383   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
384   if (flg1) {
385     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
386   }
387 
388   N         = mat->cmap->N/bs;
389   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
390   c->N      = mat->cmap->N/bs;
391   c->m      = mat->rmap->N/bs;
392   c->rstart = 0;
393 
394   c->ncolors = nis;
395   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
396   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
397   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
398   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
399   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
400 
401   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
402   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
403 
404   /*
405      Temporary option to allow for debugging/testing
406   */
407   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
408 
409   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
410   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
411 
412   for (i=0; i<nis; i++) {
413     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
414     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
415 
416     c->ncolumns[i] = n;
417     if (n) {
418       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
419       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
420     } else {
421       c->columns[i] = 0;
422     }
423 
424     if (!flg) { /* ------------------------------------------------------------------------------*/
425       /* fast, crude version requires O(N*N) work */
426       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
427       /* loop over columns*/
428       for (j=0; j<n; j++) {
429         col  = is[j];
430         rows = cj + ci[col];
431         m    = ci[col+1] - ci[col];
432         /* loop over columns marking them in rowhit */
433         for (k=0; k<m; k++) {
434           rowhit[*rows++] = col + 1;
435         }
436       }
437       /* count the number of hits */
438       nrows = 0;
439       for (j=0; j<N; j++) {
440         if (rowhit[j]) nrows++;
441       }
442       c->nrows[i] = nrows;
443       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
444       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
445       nrows       = 0;
446       for (j=0; j<N; j++) {
447         if (rowhit[j]) {
448           c->rows[i][nrows]          = j;
449           c->columnsforrow[i][nrows] = rowhit[j] - 1;
450           nrows++;
451         }
452       }
453     } else {  /*-------------------------------------------------------------------------------*/
454       /* slow version, using rowhit as a linked list */
455       PetscInt currentcol,fm,mfm;
456       rowhit[N] = N;
457       nrows     = 0;
458       /* loop over columns */
459       for (j=0; j<n; j++) {
460         col  = is[j];
461         rows = cj + ci[col];
462         m    = ci[col+1] - ci[col];
463         /* loop over columns marking them in rowhit */
464         fm = N;    /* fm points to first entry in linked list */
465         for (k=0; k<m; k++) {
466           currentcol = *rows++;
467           /* is it already in the list? */
468           do {
469             mfm = fm;
470             fm  = rowhit[fm];
471           } while (fm < currentcol);
472           /* not in list so add it */
473           if (fm != currentcol) {
474             nrows++;
475             columnsforrow[currentcol] = col;
476             /* next three lines insert new entry into linked list */
477             rowhit[mfm]        = currentcol;
478             rowhit[currentcol] = fm;
479             fm                 = currentcol;
480             /* fm points to present position in list since we know the columns are sorted */
481           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
482         }
483       }
484       c->nrows[i] = nrows;
485       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
486       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
487       /* now store the linked list of rows into c->rows[i] */
488       nrows = 0;
489       fm    = rowhit[N];
490       do {
491         c->rows[i][nrows]            = fm;
492         c->columnsforrow[i][nrows++] = columnsforrow[fm];
493         fm                           = rowhit[fm];
494       } while (fm < N);
495     } /* ---------------------------------------------------------------------------------------*/
496     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
497   }
498   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
499 
500   ierr = PetscFree(rowhit);CHKERRQ(ierr);
501   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
502 
503   /* Optimize by adding the vscale, and scaleforrow[][] fields */
504   /*
505        see the version for MPIAIJ
506   */
507   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
508   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
509   for (k=0; k<c->ncolors; k++) {
510     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
511     for (l=0; l<c->nrows[k]; l++) {
512       col = c->columnsforrow[k][l];
513 
514       c->vscaleforrow[k][l] = col;
515     }
516   }
517   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521