xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision d6752224a9aa55b1a88994429cf3102ae135c005)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4525d23c0SHong Zhang 
5*d6752224SHong Zhang /* #define STOREVALS */
6b582cc96SHong Zhang #undef __FUNCT__
7b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
8b582cc96SHong Zhang PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
9b582cc96SHong Zhang {
10b582cc96SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
11b582cc96SHong Zhang   PetscErrorCode ierr;
12*d6752224SHong Zhang   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N;
13b582cc96SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
14b582cc96SHong Zhang   PetscScalar    *vscale_array;
15b582cc96SHong Zhang   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
16b582cc96SHong Zhang   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
17b582cc96SHong Zhang   void           *fctx   = coloring->fctx;
18b582cc96SHong Zhang   PetscBool      flg     = PETSC_FALSE;
19b582cc96SHong Zhang   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
20*d6752224SHong Zhang   PetscScalar    *ca = csp->a,*ca_l;
21b582cc96SHong Zhang   PetscInt       brow,bcol,bspidx,spidx,nz,nz_i;
22*d6752224SHong Zhang #if defined(STOREVALS)
23*d6752224SHong Zhang   PetscScalar   *vals,*vals_l;
24*d6752224SHong Zhang #endif
25*d6752224SHong Zhang 
26b582cc96SHong Zhang 
27b582cc96SHong Zhang   PetscFunctionBegin;
28b582cc96SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
29b582cc96SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
30b582cc96SHong Zhang   if (flg) {
31b582cc96SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
32b582cc96SHong Zhang   } else {
33b582cc96SHong Zhang     PetscBool assembled;
34b582cc96SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
35b582cc96SHong Zhang     if (assembled) {
36b582cc96SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
37b582cc96SHong Zhang     }
38b582cc96SHong Zhang   }
39b582cc96SHong Zhang 
40b582cc96SHong Zhang   if (!coloring->vscale) {
41b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
42b582cc96SHong Zhang   }
43b582cc96SHong Zhang 
44b582cc96SHong Zhang   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
45b582cc96SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
46b582cc96SHong Zhang   }
47b582cc96SHong Zhang 
48b582cc96SHong Zhang   /* Set w1 = F(x1) */
49b582cc96SHong Zhang   if (!coloring->fset) {
50b582cc96SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
51b582cc96SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
52b582cc96SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
53b582cc96SHong Zhang   } else {
54b582cc96SHong Zhang     coloring->fset = PETSC_FALSE;
55b582cc96SHong Zhang   }
56b582cc96SHong Zhang 
57b582cc96SHong Zhang   if (!coloring->w3) {
58b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
59b582cc96SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
60b582cc96SHong Zhang   }
61b582cc96SHong Zhang   w3 = coloring->w3;
62b582cc96SHong Zhang 
63b582cc96SHong Zhang   /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */
64b582cc96SHong Zhang   ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr);
65b582cc96SHong Zhang   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
66b582cc96SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
67b582cc96SHong Zhang 
68b582cc96SHong Zhang   /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
69b582cc96SHong Zhang   for (col=0; col<N; col++) {
70b582cc96SHong Zhang     if (coloring->htype[0] == 'w') {
71b582cc96SHong Zhang       dx = 1.0 + unorm;
72b582cc96SHong Zhang     } else {
73b582cc96SHong Zhang       dx = xx[col];
74b582cc96SHong Zhang     }
75b582cc96SHong Zhang     if (dx == (PetscScalar)0.0) dx = 1.0;
76b582cc96SHong Zhang     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
77b582cc96SHong Zhang     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
78b582cc96SHong Zhang     dx               *= epsilon;
79b582cc96SHong Zhang     vscale_array[col] = (PetscScalar)dx;
80b582cc96SHong Zhang   }
81b582cc96SHong Zhang 
82*d6752224SHong Zhang #if defined(STOREVALS)
83*d6752224SHong Zhang   ierr = PetscMalloc(N*bs*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
84*d6752224SHong Zhang #endif
85*d6752224SHong Zhang 
86b582cc96SHong Zhang   nz = 0;
87b582cc96SHong Zhang   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
88b582cc96SHong Zhang     coloring->currentcolor = k;
89b582cc96SHong Zhang     nz_i = nz;
90b582cc96SHong Zhang     for (i=0; i<bs; i++) {
91b582cc96SHong Zhang       nz = nz_i;
92b582cc96SHong Zhang       ierr = VecCopy(x1,w3);CHKERRQ(ierr);
93b582cc96SHong Zhang       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
94b582cc96SHong Zhang 
95b582cc96SHong Zhang       /*
96b582cc96SHong Zhang         Loop over each column associated with color
97b582cc96SHong Zhang         adding the perturbation to the vector w3.
98b582cc96SHong Zhang       */
99b582cc96SHong Zhang       for (l=0; l<coloring->ncolumns[k]; l++) {
100b582cc96SHong Zhang         col = i + bs*coloring->columns[k][l];
101b582cc96SHong Zhang         w3_array[col] += vscale_array[col];
102b582cc96SHong Zhang       }
103b582cc96SHong Zhang       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
104b582cc96SHong Zhang 
105b582cc96SHong Zhang       /*
106b582cc96SHong Zhang         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
107b582cc96SHong Zhang         w2 = F(x1 + dx) - F(x1)
108b582cc96SHong Zhang       */
109b582cc96SHong Zhang       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
110b582cc96SHong Zhang       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
111b582cc96SHong Zhang       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
112b582cc96SHong Zhang       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
113b582cc96SHong Zhang 
114b582cc96SHong Zhang       /*
115b582cc96SHong Zhang         Loop over rows of vector, putting results into Jacobian matrix
116b582cc96SHong Zhang       */
117b582cc96SHong Zhang       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
118b582cc96SHong Zhang       for (l=0; l<coloring->nrows[k]; l++) {
119b582cc96SHong Zhang         brow   = coloring->rowcolden2sp3[nz++];
120b582cc96SHong Zhang         bcol   = coloring->rowcolden2sp3[nz++];
121b582cc96SHong Zhang         bspidx = coloring->rowcolden2sp3[nz++];
122*d6752224SHong Zhang #if defined(STOREVALS)
123*d6752224SHong Zhang         vals_l = vals + l*bs2;
124*d6752224SHong Zhang #endif
125b582cc96SHong Zhang         row   = bs*brow;
126b582cc96SHong Zhang         col   = bs*bcol + i;
127*d6752224SHong Zhang #if !defined(STOREVALS)
128*d6752224SHong Zhang         ca_l  = ca + bs2*bspidx + i*bs;
129*d6752224SHong Zhang #endif
130b582cc96SHong Zhang         for (j=0; j<bs; j++) {
131*d6752224SHong Zhang #if defined(STOREVALS)
132*d6752224SHong Zhang           vals_l[i*bs+j] = y[row+j]/vscale_array[col];
133*d6752224SHong Zhang #else
134*d6752224SHong Zhang           ca_l[j] = y[row+j]/vscale_array[col];
135*d6752224SHong Zhang #endif
136b582cc96SHong Zhang         }
137b582cc96SHong Zhang       }
138b582cc96SHong Zhang       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
139*d6752224SHong Zhang     } /* endof for (i=0; i<bs; i++) */
140*d6752224SHong Zhang 
141*d6752224SHong Zhang #if defined(STOREVALS)
142*d6752224SHong Zhang     /* insert vals to J */
143*d6752224SHong Zhang     nz = nz_i;
144*d6752224SHong Zhang     for (l=0; l<coloring->nrows[k]; l++) {
145*d6752224SHong Zhang       nz += 2;
146*d6752224SHong Zhang       bspidx = coloring->rowcolden2sp3[nz++];
147*d6752224SHong Zhang       vals_l  = vals + l*bs2;
148*d6752224SHong Zhang       ca_l    = ca + bs2*bspidx;
149*d6752224SHong Zhang       for (i=0; i<bs2; i++) {
150*d6752224SHong Zhang         ca_l[i] = vals_l[i];
151b582cc96SHong Zhang       }
152*d6752224SHong Zhang     }
153*d6752224SHong Zhang #endif
154b582cc96SHong Zhang   } /* endof for each color */
155b582cc96SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
156b582cc96SHong Zhang   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
157*d6752224SHong Zhang #if defined(STOREVALS)
158*d6752224SHong Zhang   ierr = PetscFree(vals);CHKERRQ(ierr);
159*d6752224SHong Zhang #endif
160b582cc96SHong Zhang 
161b582cc96SHong Zhang   coloring->currentcolor = -1;
162b582cc96SHong Zhang   PetscFunctionReturn(0);
163b582cc96SHong Zhang }
164b582cc96SHong Zhang 
165525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
166525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */
167525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
168525d23c0SHong Zhang #include <petsctime.h>
169525d23c0SHong Zhang #endif
170525d23c0SHong Zhang #undef __FUNCT__
171525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
172525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
173525d23c0SHong Zhang {
174525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
175525d23c0SHong Zhang   PetscErrorCode ierr;
176b582cc96SHong Zhang   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
177525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
178525d23c0SHong Zhang   PetscScalar    *vscale_array;
179525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
180525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
181525d23c0SHong Zhang   void           *fctx=coloring->fctx;
182b582cc96SHong Zhang   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
183b582cc96SHong Zhang   PetscScalar    *ca;
184525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
185525d23c0SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
186525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
187525d23c0SHong Zhang   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
188525d23c0SHong Zhang #endif
189525d23c0SHong Zhang 
190525d23c0SHong Zhang   PetscFunctionBegin;
191525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
192525d23c0SHong Zhang   ierr = PetscTime(&t0);CHKERRQ(ierr);
193525d23c0SHong Zhang #endif
194b582cc96SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
195b582cc96SHong Zhang   if (isBAIJ) {
196b582cc96SHong Zhang     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
197b582cc96SHong Zhang     ca = csp->a;
198b582cc96SHong Zhang   } else {
199b582cc96SHong Zhang     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
200b582cc96SHong Zhang     ca = csp->a;
201b582cc96SHong Zhang     bs = 1; bs2 = 1;
202b582cc96SHong Zhang   }
203b582cc96SHong Zhang 
204525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
205525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
206525d23c0SHong Zhang   if (flg) {
207525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
208525d23c0SHong Zhang   } else {
209525d23c0SHong Zhang     PetscBool assembled;
210525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
211525d23c0SHong Zhang     if (assembled) {
212525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
213525d23c0SHong Zhang     }
214525d23c0SHong Zhang   }
215525d23c0SHong Zhang 
216525d23c0SHong Zhang   if (!coloring->vscale) {
217525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
218525d23c0SHong Zhang   }
219525d23c0SHong Zhang 
220525d23c0SHong Zhang   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
221525d23c0SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
222525d23c0SHong Zhang   }
223525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
224525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
225525d23c0SHong Zhang     t_init += t1 - t0;
226525d23c0SHong Zhang #endif
227525d23c0SHong Zhang 
228525d23c0SHong Zhang   /* Set w1 = F(x1) */
229525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
230525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
231525d23c0SHong Zhang #endif
232525d23c0SHong Zhang   if (!coloring->fset) {
233525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
234525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
235525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
236525d23c0SHong Zhang   } else {
237525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
238525d23c0SHong Zhang   }
239525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
240525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
241525d23c0SHong Zhang     t_setvals += t1 - t0;
242525d23c0SHong Zhang #endif
243525d23c0SHong Zhang 
244525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
245525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
246525d23c0SHong Zhang #endif
247525d23c0SHong Zhang   if (!coloring->w3) {
248525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
249525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
250525d23c0SHong Zhang   }
251525d23c0SHong Zhang   w3 = coloring->w3;
252525d23c0SHong Zhang 
253525d23c0SHong Zhang   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
254525d23c0SHong Zhang   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
255525d23c0SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
256525d23c0SHong Zhang   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
257525d23c0SHong Zhang   for (col=0; col<N; col++) {
258525d23c0SHong Zhang     if (coloring->htype[0] == 'w') {
259525d23c0SHong Zhang       dx = 1.0 + unorm;
260525d23c0SHong Zhang     } else {
261525d23c0SHong Zhang       dx = xx[col];
262525d23c0SHong Zhang     }
263525d23c0SHong Zhang     if (dx == (PetscScalar)0.0) dx = 1.0;
264525d23c0SHong Zhang     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
265525d23c0SHong Zhang     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
266525d23c0SHong Zhang     dx               *= epsilon;
267525d23c0SHong Zhang     vscale_array[col] = (PetscScalar)dx;
268525d23c0SHong Zhang   }
269525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
270525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
271525d23c0SHong Zhang     t_init += t1 - t0;
272525d23c0SHong Zhang #endif
273525d23c0SHong Zhang 
274525d23c0SHong Zhang   nz  = 0;
275525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
276525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
277525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
278525d23c0SHong Zhang #endif
279525d23c0SHong Zhang     coloring->currentcolor = k;
280525d23c0SHong Zhang 
281525d23c0SHong Zhang     /*
282525d23c0SHong Zhang       Loop over each column associated with color
283525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
284525d23c0SHong Zhang     */
285525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
286525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
287525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
288525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
289525d23c0SHong Zhang       col = columns[k][l];
290525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
291525d23c0SHong Zhang     }
292525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
293525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
294525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
295525d23c0SHong Zhang     t_dx += t1 - t0;
296525d23c0SHong Zhang #endif
297525d23c0SHong Zhang 
298525d23c0SHong Zhang     /*
299525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
300525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
301525d23c0SHong Zhang     */
302525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
303525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
304525d23c0SHong Zhang #endif
305525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
306525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
307525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
308525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
309525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
310525d23c0SHong Zhang     t_func += t1 - t0;
311525d23c0SHong Zhang #endif
312525d23c0SHong Zhang 
313525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
314525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
315525d23c0SHong Zhang #endif
316525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
317525d23c0SHong Zhang 
318525d23c0SHong Zhang     /*
319525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
320525d23c0SHong Zhang     */
321525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
322525d23c0SHong Zhang     nrows_k = nrows[k];
323525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
324525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
325525d23c0SHong Zhang     ierr = PetscTime(&t00);CHKERRQ(ierr);
326525d23c0SHong Zhang #endif
327525d23c0SHong Zhang       row   = coloring->rowcolden2sp3[nz++];
328525d23c0SHong Zhang       col   = coloring->rowcolden2sp3[nz++];
329525d23c0SHong Zhang       spidx = coloring->rowcolden2sp3[nz++];
330525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
331525d23c0SHong Zhang       ierr = PetscTime(&t11);CHKERRQ(ierr);
332525d23c0SHong Zhang       t_kl += t11 - t00;
333525d23c0SHong Zhang #endif
334b582cc96SHong Zhang       //printf(" row col val
335b582cc96SHong Zhang       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
336b582cc96SHong Zhang       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
337525d23c0SHong Zhang     }
338525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
339525d23c0SHong Zhang 
340525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
341525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
342525d23c0SHong Zhang     t_setvals += t1 - t0;
343525d23c0SHong Zhang #endif
344525d23c0SHong Zhang   } /* endof for each color */
345525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
346525d23c0SHong Zhang   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
347525d23c0SHong Zhang   printf("     FDColorApply time: kl %g\n",t_kl);
348525d23c0SHong Zhang #endif
349525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
350525d23c0SHong Zhang   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
351525d23c0SHong Zhang 
352525d23c0SHong Zhang   coloring->currentcolor = -1;
353525d23c0SHong Zhang   PetscFunctionReturn(0);
354525d23c0SHong Zhang }
355639f9d9dSBarry Smith 
35679c1e64dSHong Zhang #undef __FUNCT__
35779c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
35879c1e64dSHong Zhang /*
35979c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
36079c1e64dSHong Zhang */
36179c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
36279c1e64dSHong Zhang {
36379c1e64dSHong Zhang   PetscErrorCode ierr;
36479c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
36579c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
36652011a10SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
36779c1e64dSHong Zhang   IS             *isa;
368525d23c0SHong Zhang   PetscBool      isBAIJ;
36979c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
37079c1e64dSHong Zhang 
37179c1e64dSHong Zhang   PetscFunctionBegin;
37279c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
37352011a10SHong Zhang 
374476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
375476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
37679c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
377525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
378525d23c0SHong Zhang   if (!isBAIJ) {
37952011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
38079c1e64dSHong Zhang   }
38179c1e64dSHong Zhang   N         = mat->cmap->N/bs;
38279c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
38379c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
38479c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
38579c1e64dSHong Zhang   c->rstart = 0;
38679c1e64dSHong Zhang 
38779c1e64dSHong Zhang   c->ncolors = nis;
38879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
38979c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
39079c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
39185740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
39279c1e64dSHong Zhang 
393525d23c0SHong Zhang   if (isBAIJ) {
394525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
395525d23c0SHong Zhang   } else {
3966378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
397525d23c0SHong Zhang   }
39879c1e64dSHong Zhang 
399476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
40079c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
40179c1e64dSHong Zhang 
40217a7dee1SHong Zhang   nz = 0;
40379c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
40479c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
40579c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
40679c1e64dSHong Zhang 
40779c1e64dSHong Zhang     c->ncolumns[i] = n;
40879c1e64dSHong Zhang     if (n) {
40979c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
41079c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
41179c1e64dSHong Zhang     } else {
41279c1e64dSHong Zhang       c->columns[i] = 0;
41379c1e64dSHong Zhang     }
41479c1e64dSHong Zhang 
41579c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
416476e0d0aSHong Zhang     nrows = 0;
41779c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
41879c1e64dSHong Zhang       col  = is[j];
41979c1e64dSHong Zhang       rows = cj + ci[col];
42079c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
42179c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
42279c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
423476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
424476e0d0aSHong Zhang         nrows++;
42579c1e64dSHong Zhang       }
42679c1e64dSHong Zhang     }
427476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
42879c1e64dSHong Zhang 
42979c1e64dSHong Zhang     nrows = 0;
43079c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
43179c1e64dSHong Zhang       if (rowhit[j]) {
43217a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
43317a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
43417a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
43579c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
43679c1e64dSHong Zhang       }
43779c1e64dSHong Zhang     }
43879c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
43979c1e64dSHong Zhang   }
44017a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
44185740eacSHong Zhang 
442525d23c0SHong Zhang   if (isBAIJ) {
443525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
444525d23c0SHong Zhang   } else {
4456378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
446525d23c0SHong Zhang   }
447476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
44879c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
449476e0d0aSHong Zhang 
4504737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
451b582cc96SHong Zhang   if (isBAIJ) {
452b582cc96SHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
453b582cc96SHong Zhang   } else {
45479c1e64dSHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
455b582cc96SHong Zhang   }
45652011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
45779c1e64dSHong Zhang   PetscFunctionReturn(0);
45879c1e64dSHong Zhang }
45979c1e64dSHong Zhang 
4604a2ae208SSatish Balay #undef __FUNCT__
4614a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
4623acb8795SBarry Smith /*
4633acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
4643acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
4653acb8795SBarry Smith */
466dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
467639f9d9dSBarry Smith {
4686849ba73SBarry Smith   PetscErrorCode ierr;
4691a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
4701a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
4713acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
472b9617806SBarry Smith   IS             *isa;
473ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
474476e0d0aSHong Zhang   PetscBool      flg1;
47579c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
476639f9d9dSBarry Smith 
4773a40ed3dSBarry Smith   PetscFunctionBegin;
47879c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
47979c1e64dSHong Zhang   if (new_impl){
48079c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
48179c1e64dSHong Zhang     PetscFunctionReturn(0);
48279c1e64dSHong Zhang   }
483e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
484522c5e43SBarry Smith 
485b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
486476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
487476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
488251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
489476e0d0aSHong Zhang   if (flg1) {
4903acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
4913acb8795SBarry Smith   }
4923acb8795SBarry Smith 
4933acb8795SBarry Smith   N         = mat->cmap->N/bs;
4943acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
4953acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
4963acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
497005c665bSBarry Smith   c->rstart = 0;
498005c665bSBarry Smith 
499639f9d9dSBarry Smith   c->ncolors = nis;
50038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
50138baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
50238baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
50338baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
50438baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
50543a90d84SBarry Smith 
5063acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
507ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
50870c3da92SBarry Smith 
50970c3da92SBarry Smith   /*
510639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
51170c3da92SBarry Smith   */
5120298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
51370c3da92SBarry Smith 
51438baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
51538baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
51670c3da92SBarry Smith 
517639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
518b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
519639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
5202205254eSKarl Rupp 
521639f9d9dSBarry Smith     c->ncolumns[i] = n;
522639f9d9dSBarry Smith     if (n) {
52338baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
52438baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
52570c3da92SBarry Smith     } else {
526639f9d9dSBarry Smith       c->columns[i] = 0;
52770c3da92SBarry Smith     }
52870c3da92SBarry Smith 
5293a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
5303a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
53138baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
532639f9d9dSBarry Smith       /* loop over columns*/
533639f9d9dSBarry Smith       for (j=0; j<n; j++) {
534639f9d9dSBarry Smith         col  = is[j];
535639f9d9dSBarry Smith         rows = cj + ci[col];
536639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
537639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
538639f9d9dSBarry Smith         for (k=0; k<m; k++) {
539639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
54070c3da92SBarry Smith         }
54170c3da92SBarry Smith       }
542639f9d9dSBarry Smith       /* count the number of hits */
543639f9d9dSBarry Smith       nrows = 0;
54470c3da92SBarry Smith       for (j=0; j<N; j++) {
545639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
546639f9d9dSBarry Smith       }
547639f9d9dSBarry Smith       c->nrows[i] = nrows;
54838baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
54938baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
550639f9d9dSBarry Smith       nrows       = 0;
551639f9d9dSBarry Smith       for (j=0; j<N; j++) {
552639f9d9dSBarry Smith         if (rowhit[j]) {
553639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
554639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
555639f9d9dSBarry Smith           nrows++;
55670c3da92SBarry Smith         }
55770c3da92SBarry Smith       }
558639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
5593a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
56038baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
561639f9d9dSBarry Smith       rowhit[N] = N;
562639f9d9dSBarry Smith       nrows     = 0;
563639f9d9dSBarry Smith       /* loop over columns */
564639f9d9dSBarry Smith       for (j=0; j<n; j++) {
565639f9d9dSBarry Smith         col  = is[j];
566639f9d9dSBarry Smith         rows = cj + ci[col];
567639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
568639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
569639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
570639f9d9dSBarry Smith         for (k=0; k<m; k++) {
571639f9d9dSBarry Smith           currentcol = *rows++;
572639f9d9dSBarry Smith           /* is it already in the list? */
573639f9d9dSBarry Smith           do {
574639f9d9dSBarry Smith             mfm = fm;
575639f9d9dSBarry Smith             fm  = rowhit[fm];
576639f9d9dSBarry Smith           } while (fm < currentcol);
577639f9d9dSBarry Smith           /* not in list so add it */
578639f9d9dSBarry Smith           if (fm != currentcol) {
579639f9d9dSBarry Smith             nrows++;
580639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
581639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
582639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
583639f9d9dSBarry Smith             rowhit[currentcol] = fm;
584639f9d9dSBarry Smith             fm                 = currentcol;
585639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
58671cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
587639f9d9dSBarry Smith         }
588639f9d9dSBarry Smith       }
589639f9d9dSBarry Smith       c->nrows[i] = nrows;
59038baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
59138baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
592639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
593639f9d9dSBarry Smith       nrows = 0;
594639f9d9dSBarry Smith       fm    = rowhit[N];
595639f9d9dSBarry Smith       do {
596639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
597639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
598639f9d9dSBarry Smith         fm                           = rowhit[fm];
599639f9d9dSBarry Smith       } while (fm < N);
600639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
601639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
602639f9d9dSBarry Smith   }
6033acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
604639f9d9dSBarry Smith 
605606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
606606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
607639f9d9dSBarry Smith 
60830b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
60930b34957SBarry Smith   /*
61030b34957SBarry Smith        see the version for MPIAIJ
61130b34957SBarry Smith   */
612ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
61338baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
61430b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
61538baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
61630b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
61730b34957SBarry Smith       col = c->columnsforrow[k][l];
6182205254eSKarl Rupp 
61930b34957SBarry Smith       c->vscaleforrow[k][l] = col;
62030b34957SBarry Smith     }
62130b34957SBarry Smith   }
622b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
6233a40ed3dSBarry Smith   PetscFunctionReturn(0);
62470c3da92SBarry Smith }
62579c1e64dSHong Zhang 
626