xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 9a09c71274829a1744640f51900178f70a1c727f)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   PetscScalar    *valaddr;
19   MatEntry       *Jentry=coloring->matentry;
20 
21   PetscFunctionBegin;
22   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24   if (flg) {
25     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26   } else {
27     PetscBool assembled;
28     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29     if (assembled) {
30       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31     }
32   }
33   if (!coloring->vscale) {
34     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35   }
36 
37   /* Set w1 = F(x1) */
38   if (!coloring->fset) {
39     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42   } else {
43     coloring->fset = PETSC_FALSE;
44   }
45 
46   if (!coloring->w3) {
47     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49   }
50   w3 = coloring->w3;
51 
52   /* Compute local scale factors: vscale = dx */
53   if (coloring->htype[0] == 'w') {
54     /* tacky test; need to make systematic if we add other approaches to computing h*/
55     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56     dx = (1.0 + unorm)*epsilon;
57     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58   } else {
59     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61     for (col=0; col<N; col++) {
62       dx = xx[col];
63       if (dx == 0.0) dx = 1.0;
64       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66       dx               *= epsilon;
67       vscale_array[col] = dx;
68     }
69     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
71   }
72 
73   nz = 0;
74   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76     coloring->currentcolor = k;
77 
78     /* Compute w3 = x1 + dx */
79     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80     dy_i = dy;
81     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83       for (l=0; l<coloring->ncolumns[k]; l++) {
84         col            = i + bs*coloring->columns[k][l];
85         w3_array[col] += vscale_array[col];
86         if (i) {
87           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88         }
89       }
90       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91 
92       /* Evaluate function w2 = F(w3) - F(x1) */
93       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98       ierr = VecResetArray(w2);CHKERRQ(ierr);
99       dy_i += N; /* points to dy+i*N */
100     }
101 
102     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103     for (l=0; l<coloring->nrows[k]; l++) {
104       row     = bs*Jentry[nz].row;
105       col     = bs*Jentry[nz].col;
106       valaddr = Jentry[nz].valaddr;
107       nz++;
108       spidx = 0;
109       dy_i  = dy;
110       for (i=0; i<bs; i++) {   /* column of the block */
111         for (j=0; j<bs; j++) { /* row of the block */
112           valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i];
113         }
114         dy_i += N; /* points to dy+i*N */
115       }
116     }
117   } /* endof for each color */
118   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
119 
120   coloring->currentcolor = -1;
121   PetscFunctionReturn(0);
122 }
123 
124 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
127 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
128 {
129   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
130   PetscErrorCode ierr;
131   PetscInt       k,l,row,col,N;
132   PetscScalar    dx,*y,*xx,*w3_array;
133   PetscScalar    *vscale_array;
134   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
135   Vec            w1=coloring->w1,w2=coloring->w2,w3;
136   void           *fctx=coloring->fctx;
137   PetscBool      flg=PETSC_FALSE;
138   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
139   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz;
140   MatEntry       *Jentry=coloring->matentry;
141 
142   PetscFunctionBegin;
143   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
144   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
145   if (flg) {
146     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
147   } else {
148     PetscBool assembled;
149     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
150     if (assembled) {
151       ierr = MatZeroEntries(J);CHKERRQ(ierr);
152     }
153   }
154   if (!coloring->vscale) {
155     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
156   }
157 
158   /* Set w1 = F(x1) */
159   if (!coloring->fset) {
160     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
161     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
162     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
163   } else {
164     coloring->fset = PETSC_FALSE;
165   }
166 
167   if (!coloring->w3) {
168     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
169     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
170   }
171   w3 = coloring->w3;
172 
173   /* Compute scale factors: vscale = dx */
174   if (coloring->htype[0] == 'w') {
175     /* tacky test; need to make systematic if we add other approaches to computing h*/
176     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
177     dx = (1.0 + unorm)*epsilon;
178     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
179   } else {
180     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
181     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
182     ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
183     for (col=0; col<N; col++) {
184       dx = xx[col];
185       if (dx == 0.0) dx = 1.0;
186       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
187       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
188       dx               *= epsilon;
189       vscale_array[col] = dx;
190     }
191     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
192     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
193   }
194 
195   nz  = 0;
196   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
197   for (k=0; k<ncolors; k++) { /* loop over colors */
198     coloring->currentcolor = k;
199 
200     /*
201       Loop over each column associated with color
202       adding the perturbation to the vector w3 = x1 + dx.
203     */
204     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
205     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
206     ncolumns_k = ncolumns[k];
207     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
208       col = columns[k][l];
209       w3_array[col] += vscale_array[col];
210     }
211     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
212 
213     /*
214       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
215                            w2 = F(x1 + dx) - F(x1)
216     */
217     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
218     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
219     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
220     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
221 
222     /*
223       Loop over rows of vector, putting w2/dx into Jacobian matrix
224     */
225     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
226     nrows_k = nrows[k];
227     nrows_k = nrows[k];
228     for (l=0; l<nrows_k; l++) { /* loop over rows */
229       row                     = Jentry[nz].row;
230       col                     = Jentry[nz].col;
231       *(Jentry[nz++].valaddr) = y[row]/vscale_array[col];
232     }
233     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
234   } /* endof for each color */
235   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
236 
237   coloring->currentcolor = -1;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
243 /*
244     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
245 */
246 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
247 {
248   PetscErrorCode ierr;
249   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
250   const PetscInt *is,*rows,*ci,*cj;
251   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,bs2,*spidx,*spidxhit,nz;
252   IS             *isa;
253   PetscBool      isBAIJ;
254   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
255   PetscScalar    *mat_val=csp->a;
256   PetscScalar    **valaddrhit;
257   MatEntry       *Jentry;
258 
259   PetscFunctionBegin;
260   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
261 
262   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
263      SeqBAIJ calls this routine, thus check it */
264   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
265   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
266   if (!isBAIJ) {
267     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
268   }
269   N         = mat->cmap->N/bs;
270   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
271   c->N      = mat->cmap->N/bs;
272   c->m      = mat->rmap->N/bs;
273   c->rstart = 0;
274 
275   c->ncolors = nis;
276   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
277   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
278   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
279 
280   ierr       = PetscMalloc(csp->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
281   ierr       = PetscLogObjectMemory((PetscObject)c,csp->nz*sizeof(MatEntry));CHKERRQ(ierr);
282   c->matentry = Jentry;
283 
284   if (isBAIJ) {
285     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
286   } else {
287     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
288   }
289 
290   ierr = PetscMalloc4(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
291   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
292 
293   nz = 0;
294   for (i=0; i<nis; i++) { /* loop over colors */
295     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
296     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
297 
298     c->ncolumns[i] = n;
299     if (n) {
300       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
301       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
302     } else {
303       c->columns[i] = 0;
304     }
305 
306     /* fast, crude version requires O(N*N) work */
307     bs2 = bs*bs;
308     nrows = 0;
309     for (j=0; j<n; j++) {  /* loop over columns */
310       col  = is[j];
311       rows = cj + ci[col];
312       m    = ci[col+1] - ci[col];
313       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
314         rowhit[*rows]     = col + 1;
315         spidxhit[*rows]   = spidx[ci[col] + k];
316         valaddrhit[*rows] = &mat_val[bs2*spidx[ci[col] + k]];
317         rows++;
318         nrows++;
319       }
320     }
321     c->nrows[i] = nrows; /* total num of rows for this color */
322 
323     nrows = 0;
324     for (j=0; j<N; j++) { /* loop over rows */
325       if (rowhit[j]) {
326         Jentry[nz].row     = j;              /* local row index */
327         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
328         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
329         nz++;
330         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
331       }
332     }
333     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
334   }
335   if (nz != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
336 
337   if (isBAIJ) {
338     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
339   } else {
340     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
341   }
342   ierr = PetscFree4(rowhit,columnsforrow,spidxhit,valaddrhit);CHKERRQ(ierr);
343   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
344 
345   c->ctype                  = IS_COLORING_GHOSTED;
346   if (isBAIJ) {
347     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
348     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
349   } else {
350     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
351   }
352   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
358 /*
359     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
360     This is why it has the ugly code with the MatGetBlockSize()
361 */
362 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
363 {
364   PetscErrorCode ierr;
365   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
366   const PetscInt *is,*rows,*ci,*cj;
367   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
368   IS             *isa;
369   PetscBool      done,flg = PETSC_FALSE;
370   PetscBool      flg1;
371   PetscBool      new_impl=PETSC_FALSE;
372 
373   PetscFunctionBegin;
374   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
375   if (new_impl){
376     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
377     PetscFunctionReturn(0);
378   }
379   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
380 
381   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
382   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
383      SeqBAIJ calls this routine, thus check it */
384   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
385   if (flg1) {
386     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
387   }
388 
389   N         = mat->cmap->N/bs;
390   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
391   c->N      = mat->cmap->N/bs;
392   c->m      = mat->rmap->N/bs;
393   c->rstart = 0;
394 
395   c->ncolors = nis;
396   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
397   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
398   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
399   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
400   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
401 
402   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
403   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
404 
405   /*
406      Temporary option to allow for debugging/testing
407   */
408   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
409 
410   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
412 
413   for (i=0; i<nis; i++) {
414     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
415     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
416 
417     c->ncolumns[i] = n;
418     if (n) {
419       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
420       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
421     } else {
422       c->columns[i] = 0;
423     }
424 
425     if (!flg) { /* ------------------------------------------------------------------------------*/
426       /* fast, crude version requires O(N*N) work */
427       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
428       /* loop over columns*/
429       for (j=0; j<n; j++) {
430         col  = is[j];
431         rows = cj + ci[col];
432         m    = ci[col+1] - ci[col];
433         /* loop over columns marking them in rowhit */
434         for (k=0; k<m; k++) {
435           rowhit[*rows++] = col + 1;
436         }
437       }
438       /* count the number of hits */
439       nrows = 0;
440       for (j=0; j<N; j++) {
441         if (rowhit[j]) nrows++;
442       }
443       c->nrows[i] = nrows;
444       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
445       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
446       nrows       = 0;
447       for (j=0; j<N; j++) {
448         if (rowhit[j]) {
449           c->rows[i][nrows]          = j;
450           c->columnsforrow[i][nrows] = rowhit[j] - 1;
451           nrows++;
452         }
453       }
454     } else {  /*-------------------------------------------------------------------------------*/
455       /* slow version, using rowhit as a linked list */
456       PetscInt currentcol,fm,mfm;
457       rowhit[N] = N;
458       nrows     = 0;
459       /* loop over columns */
460       for (j=0; j<n; j++) {
461         col  = is[j];
462         rows = cj + ci[col];
463         m    = ci[col+1] - ci[col];
464         /* loop over columns marking them in rowhit */
465         fm = N;    /* fm points to first entry in linked list */
466         for (k=0; k<m; k++) {
467           currentcol = *rows++;
468           /* is it already in the list? */
469           do {
470             mfm = fm;
471             fm  = rowhit[fm];
472           } while (fm < currentcol);
473           /* not in list so add it */
474           if (fm != currentcol) {
475             nrows++;
476             columnsforrow[currentcol] = col;
477             /* next three lines insert new entry into linked list */
478             rowhit[mfm]        = currentcol;
479             rowhit[currentcol] = fm;
480             fm                 = currentcol;
481             /* fm points to present position in list since we know the columns are sorted */
482           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
483         }
484       }
485       c->nrows[i] = nrows;
486       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
487       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
488       /* now store the linked list of rows into c->rows[i] */
489       nrows = 0;
490       fm    = rowhit[N];
491       do {
492         c->rows[i][nrows]            = fm;
493         c->columnsforrow[i][nrows++] = columnsforrow[fm];
494         fm                           = rowhit[fm];
495       } while (fm < N);
496     } /* ---------------------------------------------------------------------------------------*/
497     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
498   }
499   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
500 
501   ierr = PetscFree(rowhit);CHKERRQ(ierr);
502   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
503 
504   /* Optimize by adding the vscale, and scaleforrow[][] fields */
505   /*
506        see the version for MPIAIJ
507   */
508   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
509   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
510   for (k=0; k<c->ncolors; k++) {
511     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
512     for (l=0; l<c->nrows[k]; l++) {
513       col = c->columnsforrow[k][l];
514 
515       c->vscaleforrow[k][l] = col;
516     }
517   }
518   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522