xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision b7799381d9b8e477defde34ba576b62ec2b0e4ad)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4525d23c0SHong Zhang 
5b582cc96SHong Zhang #undef __FUNCT__
6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7b582cc96SHong Zhang PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8b582cc96SHong Zhang {
9b582cc96SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10b582cc96SHong Zhang   PetscErrorCode ierr;
11*b7799381SHong Zhang   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12*b7799381SHong Zhang   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13b582cc96SHong Zhang   PetscScalar    *vscale_array;
14b582cc96SHong Zhang   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15b582cc96SHong Zhang   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16b582cc96SHong Zhang   void           *fctx   = coloring->fctx;
17b582cc96SHong Zhang   PetscBool      flg     = PETSC_FALSE;
18b582cc96SHong Zhang   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19d6752224SHong Zhang   PetscScalar    *ca = csp->a,*ca_l;
20b582cc96SHong Zhang 
21b582cc96SHong Zhang   PetscFunctionBegin;
22b582cc96SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23b582cc96SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24b582cc96SHong Zhang   if (flg) {
25b582cc96SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26b582cc96SHong Zhang   } else {
27b582cc96SHong Zhang     PetscBool assembled;
28b582cc96SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29b582cc96SHong Zhang     if (assembled) {
30b582cc96SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31b582cc96SHong Zhang     }
32b582cc96SHong Zhang   }
33b582cc96SHong Zhang   if (!coloring->vscale) {
34b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35b582cc96SHong Zhang   }
36b582cc96SHong Zhang 
37b582cc96SHong Zhang   /* Set w1 = F(x1) */
38b582cc96SHong Zhang   if (!coloring->fset) {
39b582cc96SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40b582cc96SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41b582cc96SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42b582cc96SHong Zhang   } else {
43b582cc96SHong Zhang     coloring->fset = PETSC_FALSE;
44b582cc96SHong Zhang   }
45b582cc96SHong Zhang 
46b582cc96SHong Zhang   if (!coloring->w3) {
47b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48b582cc96SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49b582cc96SHong Zhang   }
50b582cc96SHong Zhang   w3 = coloring->w3;
51b582cc96SHong Zhang 
52*b7799381SHong Zhang   /* Compute local scale factors: vscale = dx */
53b582cc96SHong Zhang   if (coloring->htype[0] == 'w') {
54*b7799381SHong Zhang     /* tacky test; need to make systematic if we add other approaches to computing h*/
55*b7799381SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56*b7799381SHong Zhang     dx = (1.0 + unorm)*epsilon;
57*b7799381SHong Zhang     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58b582cc96SHong Zhang   } else {
59*b7799381SHong Zhang     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60*b7799381SHong Zhang     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61*b7799381SHong Zhang     for (col=0; col<N; col++) {
62b582cc96SHong Zhang       dx = xx[col];
63*b7799381SHong Zhang       if (dx == 0.0) dx = 1.0;
64b582cc96SHong Zhang       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65b582cc96SHong Zhang       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66b582cc96SHong Zhang       dx               *= epsilon;
67*b7799381SHong Zhang       vscale_array[col] = dx;
68b582cc96SHong Zhang     }
69*b7799381SHong Zhang     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70*b7799381SHong Zhang     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
7187d34202SHong Zhang   }
7287d34202SHong Zhang 
73b582cc96SHong Zhang   nz = 0;
74*b7799381SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75b582cc96SHong Zhang   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76b582cc96SHong Zhang     coloring->currentcolor = k;
7787d34202SHong Zhang 
7887d34202SHong Zhang     /* Compute w3 = x1 + dx */
79b582cc96SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80*b7799381SHong Zhang     dy_i = dy;
81*b7799381SHong Zhang     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82b582cc96SHong Zhang       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83b582cc96SHong Zhang       for (l=0; l<coloring->ncolumns[k]; l++) {
84b582cc96SHong Zhang         col            = i + bs*coloring->columns[k][l];
85b582cc96SHong Zhang         w3_array[col] += vscale_array[col];
86*b7799381SHong Zhang         if (i) {
87*b7799381SHong Zhang           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88*b7799381SHong Zhang         }
89b582cc96SHong Zhang       }
90b582cc96SHong Zhang       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91b582cc96SHong Zhang 
92*b7799381SHong Zhang       /* Evaluate function w2 = F(w3) - F(x1) */
93b582cc96SHong Zhang       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94*b7799381SHong Zhang       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95*b7799381SHong Zhang       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96b582cc96SHong Zhang       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97*b7799381SHong Zhang       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98*b7799381SHong Zhang       ierr = VecResetArray(w2);CHKERRQ(ierr);
99*b7799381SHong Zhang       dy_i += N; /* points to dy+i*N */
10087d34202SHong Zhang     }
101b582cc96SHong Zhang 
102*b7799381SHong Zhang     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103b582cc96SHong Zhang     for (l=0; l<coloring->nrows[k]; l++) {
104*b7799381SHong Zhang       row   = bs*coloring->rowcolden2sp3[nz++];
105*b7799381SHong Zhang       col   = bs*coloring->rowcolden2sp3[nz++];
106*b7799381SHong Zhang       ca_l  = ca + bs2*coloring->rowcolden2sp3[nz++];
10726cb46bfSHong Zhang       spidx = 0;
108*b7799381SHong Zhang       dy_i  = dy;
10926cb46bfSHong Zhang       for (i=0; i<bs; i++) {   /* column of the block */
11026cb46bfSHong Zhang         for (j=0; j<bs; j++) { /* row of the block */
111*b7799381SHong Zhang           ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i];
112b582cc96SHong Zhang         }
113*b7799381SHong Zhang         dy_i += N; /* points to dy+i*N */
11426cb46bfSHong Zhang       }
11526cb46bfSHong Zhang     }
116b582cc96SHong Zhang   } /* endof for each color */
117b582cc96SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
118*b7799381SHong Zhang 
119b582cc96SHong Zhang   coloring->currentcolor = -1;
120b582cc96SHong Zhang   PetscFunctionReturn(0);
121b582cc96SHong Zhang }
122b582cc96SHong Zhang 
123525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
124525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */
125525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
126525d23c0SHong Zhang #include <petsctime.h>
127525d23c0SHong Zhang #endif
128525d23c0SHong Zhang #undef __FUNCT__
129525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
130525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
131525d23c0SHong Zhang {
132525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
133525d23c0SHong Zhang   PetscErrorCode ierr;
134b582cc96SHong Zhang   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
135525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
136525d23c0SHong Zhang   PetscScalar    *vscale_array;
137525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
138525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
139525d23c0SHong Zhang   void           *fctx=coloring->fctx;
140b582cc96SHong Zhang   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
141b582cc96SHong Zhang   PetscScalar    *ca;
142525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
143525d23c0SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
144525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
145525d23c0SHong Zhang   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
146525d23c0SHong Zhang #endif
147525d23c0SHong Zhang 
148525d23c0SHong Zhang   PetscFunctionBegin;
149525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
150525d23c0SHong Zhang   ierr = PetscTime(&t0);CHKERRQ(ierr);
151525d23c0SHong Zhang #endif
152b582cc96SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
153b582cc96SHong Zhang   if (isBAIJ) {
154b582cc96SHong Zhang     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
155b582cc96SHong Zhang     ca = csp->a;
156b582cc96SHong Zhang   } else {
157b582cc96SHong Zhang     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
158b582cc96SHong Zhang     ca = csp->a;
159b582cc96SHong Zhang     bs = 1; bs2 = 1;
160b582cc96SHong Zhang   }
161b582cc96SHong Zhang 
162525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
163525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
164525d23c0SHong Zhang   if (flg) {
165525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
166525d23c0SHong Zhang   } else {
167525d23c0SHong Zhang     PetscBool assembled;
168525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
169525d23c0SHong Zhang     if (assembled) {
170525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
171525d23c0SHong Zhang     }
172525d23c0SHong Zhang   }
173525d23c0SHong Zhang 
174525d23c0SHong Zhang   if (!coloring->vscale) {
175525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
176525d23c0SHong Zhang   }
177525d23c0SHong Zhang 
178525d23c0SHong Zhang   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
179525d23c0SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
180525d23c0SHong Zhang   }
181525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
182525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
183525d23c0SHong Zhang     t_init += t1 - t0;
184525d23c0SHong Zhang #endif
185525d23c0SHong Zhang 
186525d23c0SHong Zhang   /* Set w1 = F(x1) */
187525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
188525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
189525d23c0SHong Zhang #endif
190525d23c0SHong Zhang   if (!coloring->fset) {
191525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
192525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
193525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
194525d23c0SHong Zhang   } else {
195525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
196525d23c0SHong Zhang   }
197525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
198525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
199525d23c0SHong Zhang     t_setvals += t1 - t0;
200525d23c0SHong Zhang #endif
201525d23c0SHong Zhang 
202525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
203525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
204525d23c0SHong Zhang #endif
205525d23c0SHong Zhang   if (!coloring->w3) {
206525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
207525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
208525d23c0SHong Zhang   }
209525d23c0SHong Zhang   w3 = coloring->w3;
210525d23c0SHong Zhang 
211525d23c0SHong Zhang   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
212525d23c0SHong Zhang   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
213525d23c0SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
214525d23c0SHong Zhang   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
215525d23c0SHong Zhang   for (col=0; col<N; col++) {
216525d23c0SHong Zhang     if (coloring->htype[0] == 'w') {
217525d23c0SHong Zhang       dx = 1.0 + unorm;
218525d23c0SHong Zhang     } else {
219525d23c0SHong Zhang       dx = xx[col];
220525d23c0SHong Zhang     }
221525d23c0SHong Zhang     if (dx == (PetscScalar)0.0) dx = 1.0;
222525d23c0SHong Zhang     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
223525d23c0SHong Zhang     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
224525d23c0SHong Zhang     dx               *= epsilon;
225525d23c0SHong Zhang     vscale_array[col] = (PetscScalar)dx;
226525d23c0SHong Zhang   }
227525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
228525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
229525d23c0SHong Zhang     t_init += t1 - t0;
230525d23c0SHong Zhang #endif
231525d23c0SHong Zhang 
232525d23c0SHong Zhang   nz  = 0;
233525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
234525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
235525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
236525d23c0SHong Zhang #endif
237525d23c0SHong Zhang     coloring->currentcolor = k;
238525d23c0SHong Zhang 
239525d23c0SHong Zhang     /*
240525d23c0SHong Zhang       Loop over each column associated with color
241525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
242525d23c0SHong Zhang     */
243525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
244525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
245525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
246525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
247525d23c0SHong Zhang       col = columns[k][l];
248525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
249525d23c0SHong Zhang     }
250525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
251525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
252525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
253525d23c0SHong Zhang     t_dx += t1 - t0;
254525d23c0SHong Zhang #endif
255525d23c0SHong Zhang 
256525d23c0SHong Zhang     /*
257525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
258525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
259525d23c0SHong Zhang     */
260525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
261525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
262525d23c0SHong Zhang #endif
263525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
264525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
265525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
266525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
267525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
268525d23c0SHong Zhang     t_func += t1 - t0;
269525d23c0SHong Zhang #endif
270525d23c0SHong Zhang 
271525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
272525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
273525d23c0SHong Zhang #endif
274525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
275525d23c0SHong Zhang 
276525d23c0SHong Zhang     /*
277525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
278525d23c0SHong Zhang     */
279525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
280525d23c0SHong Zhang     nrows_k = nrows[k];
281525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
282525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
283525d23c0SHong Zhang     ierr = PetscTime(&t00);CHKERRQ(ierr);
284525d23c0SHong Zhang #endif
285525d23c0SHong Zhang       row   = coloring->rowcolden2sp3[nz++];
286525d23c0SHong Zhang       col   = coloring->rowcolden2sp3[nz++];
287525d23c0SHong Zhang       spidx = coloring->rowcolden2sp3[nz++];
288525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
289525d23c0SHong Zhang       ierr = PetscTime(&t11);CHKERRQ(ierr);
290525d23c0SHong Zhang       t_kl += t11 - t00;
291525d23c0SHong Zhang #endif
292b582cc96SHong Zhang       //printf(" row col val
293b582cc96SHong Zhang       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
294b582cc96SHong Zhang       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
295525d23c0SHong Zhang     }
296525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
297525d23c0SHong Zhang 
298525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
299525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
300525d23c0SHong Zhang     t_setvals += t1 - t0;
301525d23c0SHong Zhang #endif
302525d23c0SHong Zhang   } /* endof for each color */
303525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
304525d23c0SHong Zhang   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
305525d23c0SHong Zhang   printf("     FDColorApply time: kl %g\n",t_kl);
306525d23c0SHong Zhang #endif
307525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
308525d23c0SHong Zhang   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
309525d23c0SHong Zhang 
310525d23c0SHong Zhang   coloring->currentcolor = -1;
311525d23c0SHong Zhang   PetscFunctionReturn(0);
312525d23c0SHong Zhang }
313639f9d9dSBarry Smith 
31479c1e64dSHong Zhang #undef __FUNCT__
31579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
31679c1e64dSHong Zhang /*
31779c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
31879c1e64dSHong Zhang */
31979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
32079c1e64dSHong Zhang {
32179c1e64dSHong Zhang   PetscErrorCode ierr;
32279c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
32379c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
32452011a10SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
32579c1e64dSHong Zhang   IS             *isa;
326525d23c0SHong Zhang   PetscBool      isBAIJ;
32779c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
32879c1e64dSHong Zhang 
32979c1e64dSHong Zhang   PetscFunctionBegin;
33079c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
33152011a10SHong Zhang 
332476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
333476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
33479c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
335525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
336525d23c0SHong Zhang   if (!isBAIJ) {
33752011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
33879c1e64dSHong Zhang   }
33979c1e64dSHong Zhang   N         = mat->cmap->N/bs;
34079c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
34179c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
34279c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
34379c1e64dSHong Zhang   c->rstart = 0;
34479c1e64dSHong Zhang 
34579c1e64dSHong Zhang   c->ncolors = nis;
34679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
34779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
34879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
34985740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
35079c1e64dSHong Zhang 
351525d23c0SHong Zhang   if (isBAIJ) {
352525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
353525d23c0SHong Zhang   } else {
3546378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
355525d23c0SHong Zhang   }
35679c1e64dSHong Zhang 
357476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
35879c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
35979c1e64dSHong Zhang 
36017a7dee1SHong Zhang   nz = 0;
36179c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
36279c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
36379c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
36479c1e64dSHong Zhang 
36579c1e64dSHong Zhang     c->ncolumns[i] = n;
36679c1e64dSHong Zhang     if (n) {
36779c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
36879c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
36979c1e64dSHong Zhang     } else {
37079c1e64dSHong Zhang       c->columns[i] = 0;
37179c1e64dSHong Zhang     }
37279c1e64dSHong Zhang 
37379c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
374476e0d0aSHong Zhang     nrows = 0;
37579c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
37679c1e64dSHong Zhang       col  = is[j];
37779c1e64dSHong Zhang       rows = cj + ci[col];
37879c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
37979c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
38079c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
381476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
382476e0d0aSHong Zhang         nrows++;
38379c1e64dSHong Zhang       }
38479c1e64dSHong Zhang     }
385476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
38679c1e64dSHong Zhang 
38779c1e64dSHong Zhang     nrows = 0;
38879c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
38979c1e64dSHong Zhang       if (rowhit[j]) {
39017a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
39117a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
39217a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
39379c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
39479c1e64dSHong Zhang       }
39579c1e64dSHong Zhang     }
39679c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
39779c1e64dSHong Zhang   }
39817a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
39985740eacSHong Zhang 
400525d23c0SHong Zhang   if (isBAIJ) {
401525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
402525d23c0SHong Zhang   } else {
4036378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
404525d23c0SHong Zhang   }
405476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
40679c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
407476e0d0aSHong Zhang 
4084737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
409b582cc96SHong Zhang   if (isBAIJ) {
410b582cc96SHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
411*b7799381SHong Zhang     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
412b582cc96SHong Zhang   } else {
41379c1e64dSHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
414b582cc96SHong Zhang   }
41552011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
41679c1e64dSHong Zhang   PetscFunctionReturn(0);
41779c1e64dSHong Zhang }
41879c1e64dSHong Zhang 
4194a2ae208SSatish Balay #undef __FUNCT__
4204a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
4213acb8795SBarry Smith /*
4223acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
4233acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
4243acb8795SBarry Smith */
425dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
426639f9d9dSBarry Smith {
4276849ba73SBarry Smith   PetscErrorCode ierr;
4281a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
4291a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
4303acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
431b9617806SBarry Smith   IS             *isa;
432ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
433476e0d0aSHong Zhang   PetscBool      flg1;
43479c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
435639f9d9dSBarry Smith 
4363a40ed3dSBarry Smith   PetscFunctionBegin;
43779c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
43879c1e64dSHong Zhang   if (new_impl){
43979c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
44079c1e64dSHong Zhang     PetscFunctionReturn(0);
44179c1e64dSHong Zhang   }
442e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
443522c5e43SBarry Smith 
444b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
445476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
446476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
447251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
448476e0d0aSHong Zhang   if (flg1) {
4493acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
4503acb8795SBarry Smith   }
4513acb8795SBarry Smith 
4523acb8795SBarry Smith   N         = mat->cmap->N/bs;
4533acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
4543acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
4553acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
456005c665bSBarry Smith   c->rstart = 0;
457005c665bSBarry Smith 
458639f9d9dSBarry Smith   c->ncolors = nis;
45938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
46038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
46138baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
46238baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
46338baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
46443a90d84SBarry Smith 
4653acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
466ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
46770c3da92SBarry Smith 
46870c3da92SBarry Smith   /*
469639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
47070c3da92SBarry Smith   */
4710298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
47270c3da92SBarry Smith 
47338baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
47438baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
47570c3da92SBarry Smith 
476639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
477b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
478639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
4792205254eSKarl Rupp 
480639f9d9dSBarry Smith     c->ncolumns[i] = n;
481639f9d9dSBarry Smith     if (n) {
48238baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
48338baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
48470c3da92SBarry Smith     } else {
485639f9d9dSBarry Smith       c->columns[i] = 0;
48670c3da92SBarry Smith     }
48770c3da92SBarry Smith 
4883a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
4893a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
49038baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
491639f9d9dSBarry Smith       /* loop over columns*/
492639f9d9dSBarry Smith       for (j=0; j<n; j++) {
493639f9d9dSBarry Smith         col  = is[j];
494639f9d9dSBarry Smith         rows = cj + ci[col];
495639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
496639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
497639f9d9dSBarry Smith         for (k=0; k<m; k++) {
498639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
49970c3da92SBarry Smith         }
50070c3da92SBarry Smith       }
501639f9d9dSBarry Smith       /* count the number of hits */
502639f9d9dSBarry Smith       nrows = 0;
50370c3da92SBarry Smith       for (j=0; j<N; j++) {
504639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
505639f9d9dSBarry Smith       }
506639f9d9dSBarry Smith       c->nrows[i] = nrows;
50738baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
50838baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
509639f9d9dSBarry Smith       nrows       = 0;
510639f9d9dSBarry Smith       for (j=0; j<N; j++) {
511639f9d9dSBarry Smith         if (rowhit[j]) {
512639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
513639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
514639f9d9dSBarry Smith           nrows++;
51570c3da92SBarry Smith         }
51670c3da92SBarry Smith       }
517639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
5183a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
51938baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
520639f9d9dSBarry Smith       rowhit[N] = N;
521639f9d9dSBarry Smith       nrows     = 0;
522639f9d9dSBarry Smith       /* loop over columns */
523639f9d9dSBarry Smith       for (j=0; j<n; j++) {
524639f9d9dSBarry Smith         col  = is[j];
525639f9d9dSBarry Smith         rows = cj + ci[col];
526639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
527639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
528639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
529639f9d9dSBarry Smith         for (k=0; k<m; k++) {
530639f9d9dSBarry Smith           currentcol = *rows++;
531639f9d9dSBarry Smith           /* is it already in the list? */
532639f9d9dSBarry Smith           do {
533639f9d9dSBarry Smith             mfm = fm;
534639f9d9dSBarry Smith             fm  = rowhit[fm];
535639f9d9dSBarry Smith           } while (fm < currentcol);
536639f9d9dSBarry Smith           /* not in list so add it */
537639f9d9dSBarry Smith           if (fm != currentcol) {
538639f9d9dSBarry Smith             nrows++;
539639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
540639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
541639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
542639f9d9dSBarry Smith             rowhit[currentcol] = fm;
543639f9d9dSBarry Smith             fm                 = currentcol;
544639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
54571cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
546639f9d9dSBarry Smith         }
547639f9d9dSBarry Smith       }
548639f9d9dSBarry Smith       c->nrows[i] = nrows;
54938baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
55038baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
551639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
552639f9d9dSBarry Smith       nrows = 0;
553639f9d9dSBarry Smith       fm    = rowhit[N];
554639f9d9dSBarry Smith       do {
555639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
556639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
557639f9d9dSBarry Smith         fm                           = rowhit[fm];
558639f9d9dSBarry Smith       } while (fm < N);
559639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
560639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
561639f9d9dSBarry Smith   }
5623acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
563639f9d9dSBarry Smith 
564606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
565606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
566639f9d9dSBarry Smith 
56730b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
56830b34957SBarry Smith   /*
56930b34957SBarry Smith        see the version for MPIAIJ
57030b34957SBarry Smith   */
571ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
57238baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
57330b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
57438baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
57530b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
57630b34957SBarry Smith       col = c->columnsforrow[k][l];
5772205254eSKarl Rupp 
57830b34957SBarry Smith       c->vscaleforrow[k][l] = col;
57930b34957SBarry Smith     }
58030b34957SBarry Smith   }
581b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
5823a40ed3dSBarry Smith   PetscFunctionReturn(0);
58370c3da92SBarry Smith }
58479c1e64dSHong Zhang 
585