xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 26cb46bf7b018f4c5d97c7e7cd27641055042892)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h>
4525d23c0SHong Zhang 
5b582cc96SHong Zhang #undef __FUNCT__
6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7b582cc96SHong Zhang PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8b582cc96SHong Zhang {
9b582cc96SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10b582cc96SHong Zhang   PetscErrorCode ierr;
11d6752224SHong Zhang   PetscInt       bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N;
12b582cc96SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
13b582cc96SHong Zhang   PetscScalar    *vscale_array;
14b582cc96SHong Zhang   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15b582cc96SHong Zhang   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16b582cc96SHong Zhang   void           *fctx   = coloring->fctx;
17b582cc96SHong Zhang   PetscBool      flg     = PETSC_FALSE;
18b582cc96SHong Zhang   Mat_SeqBAIJ    *csp=(Mat_SeqBAIJ*)J->data;
19d6752224SHong Zhang   PetscScalar    *ca = csp->a,*ca_l;
20*26cb46bfSHong Zhang   PetscInt       brow,bcol,bspidx,spidx,nz;
21b582cc96SHong Zhang 
22b582cc96SHong Zhang   PetscFunctionBegin;
23b582cc96SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
24b582cc96SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
25b582cc96SHong Zhang   if (flg) {
26b582cc96SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
27b582cc96SHong Zhang   } else {
28b582cc96SHong Zhang     PetscBool assembled;
29b582cc96SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
30b582cc96SHong Zhang     if (assembled) {
31b582cc96SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
32b582cc96SHong Zhang     }
33b582cc96SHong Zhang   }
34b582cc96SHong Zhang 
35b582cc96SHong Zhang   if (!coloring->vscale) {
36b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
37b582cc96SHong Zhang   }
38b582cc96SHong Zhang 
39b582cc96SHong Zhang   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
40b582cc96SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
41b582cc96SHong Zhang   }
42b582cc96SHong Zhang 
43b582cc96SHong Zhang   /* Set w1 = F(x1) */
44b582cc96SHong Zhang   if (!coloring->fset) {
45b582cc96SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
46b582cc96SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
47b582cc96SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
48b582cc96SHong Zhang   } else {
49b582cc96SHong Zhang     coloring->fset = PETSC_FALSE;
50b582cc96SHong Zhang   }
51b582cc96SHong Zhang 
52b582cc96SHong Zhang   if (!coloring->w3) {
53b582cc96SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
54b582cc96SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
55b582cc96SHong Zhang   }
56b582cc96SHong Zhang   w3 = coloring->w3;
57b582cc96SHong Zhang 
58b582cc96SHong Zhang   /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */
59b582cc96SHong Zhang   ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr);
60b582cc96SHong Zhang   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61b582cc96SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
62b582cc96SHong Zhang 
63b582cc96SHong Zhang   /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
64b582cc96SHong Zhang   for (col=0; col<N; col++) {
65b582cc96SHong Zhang     if (coloring->htype[0] == 'w') {
66b582cc96SHong Zhang       dx = 1.0 + unorm;
67b582cc96SHong Zhang     } else {
68b582cc96SHong Zhang       dx = xx[col];
69b582cc96SHong Zhang     }
70b582cc96SHong Zhang     if (dx == (PetscScalar)0.0) dx = 1.0;
71b582cc96SHong Zhang     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
72b582cc96SHong Zhang     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
73b582cc96SHong Zhang     dx               *= epsilon;
74b582cc96SHong Zhang     vscale_array[col] = (PetscScalar)dx;
75b582cc96SHong Zhang   }
76b582cc96SHong Zhang 
7787d34202SHong Zhang   /* Create bs vectors w2s and w3s */
7887d34202SHong Zhang   Vec w2s[bs];
7987d34202SHong Zhang   for (i=0; i<bs; i++) {
8087d34202SHong Zhang     ierr = VecDuplicate(w2,&w2s[i]);CHKERRQ(ierr);
8187d34202SHong Zhang   }
8287d34202SHong Zhang 
83b582cc96SHong Zhang   nz = 0;
84b582cc96SHong Zhang   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
85b582cc96SHong Zhang     coloring->currentcolor = k;
8687d34202SHong Zhang 
8787d34202SHong Zhang     /* Compute w3 = x1 + dx */
88b582cc96SHong Zhang     for (i=0; i<bs; i++) {
89b582cc96SHong Zhang       ierr = VecCopy(x1,w3);CHKERRQ(ierr);
90b582cc96SHong Zhang       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
91b582cc96SHong Zhang       for (l=0; l<coloring->ncolumns[k]; l++) {
92b582cc96SHong Zhang         col = i + bs*coloring->columns[k][l];
93b582cc96SHong Zhang         w3_array[col] += vscale_array[col];
94b582cc96SHong Zhang       }
95b582cc96SHong Zhang       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
96b582cc96SHong Zhang 
9787d34202SHong Zhang       /* Evaluate function w2s = F(w3) - F(x1) */
98b582cc96SHong Zhang       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
9987d34202SHong Zhang       ierr = (*f)(sctx,w3,w2s[i],fctx);CHKERRQ(ierr);
100b582cc96SHong Zhang       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
10187d34202SHong Zhang       ierr = VecAXPY(w2s[i],-1.0,w1);CHKERRQ(ierr);
10287d34202SHong Zhang     }
103b582cc96SHong Zhang 
104*26cb46bfSHong Zhang     /* Loop over rows of vector, putting results into Jacobian matrix */
105b582cc96SHong Zhang     for (l=0; l<coloring->nrows[k]; l++) {
106b582cc96SHong Zhang         brow   = coloring->rowcolden2sp3[nz++];
107b582cc96SHong Zhang         bcol   = coloring->rowcolden2sp3[nz++];
108b582cc96SHong Zhang         bspidx = coloring->rowcolden2sp3[nz++];
109*26cb46bfSHong Zhang         ca_l   = ca + bs2*bspidx;
110*26cb46bfSHong Zhang         spidx  = 0;
111*26cb46bfSHong Zhang         for (i=0; i<bs; i++) {   /* column of the block */
112*26cb46bfSHong Zhang           ierr = VecGetArray(w2s[i],&y);CHKERRQ(ierr);
113b582cc96SHong Zhang           col  = bs*bcol + i;
114*26cb46bfSHong Zhang           for (j=0; j<bs; j++) { /* row of the block */
115*26cb46bfSHong Zhang             row = bs*brow + j;
116*26cb46bfSHong Zhang             ca_l[spidx++] = y[row]/vscale_array[col];
117b582cc96SHong Zhang           }
11887d34202SHong Zhang           ierr = VecRestoreArray(w2s[i],&y);CHKERRQ(ierr);
119*26cb46bfSHong Zhang         }
120*26cb46bfSHong Zhang     }
121b582cc96SHong Zhang   } /* endof for each color */
122b582cc96SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
123b582cc96SHong Zhang   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
12487d34202SHong Zhang   for (i=0; i<bs; i++) {
12587d34202SHong Zhang     ierr = VecDestroy(&w2s[i]);CHKERRQ(ierr);
12687d34202SHong Zhang   }
127b582cc96SHong Zhang   coloring->currentcolor = -1;
128b582cc96SHong Zhang   PetscFunctionReturn(0);
129b582cc96SHong Zhang }
130b582cc96SHong Zhang 
131525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
132525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */
133525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
134525d23c0SHong Zhang #include <petsctime.h>
135525d23c0SHong Zhang #endif
136525d23c0SHong Zhang #undef __FUNCT__
137525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
138525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
139525d23c0SHong Zhang {
140525d23c0SHong Zhang   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
141525d23c0SHong Zhang   PetscErrorCode ierr;
142b582cc96SHong Zhang   PetscInt       k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs;
143525d23c0SHong Zhang   PetscScalar    dx,*y,*xx,*w3_array;
144525d23c0SHong Zhang   PetscScalar    *vscale_array;
145525d23c0SHong Zhang   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
146525d23c0SHong Zhang   Vec            w1=coloring->w1,w2=coloring->w2,w3;
147525d23c0SHong Zhang   void           *fctx=coloring->fctx;
148b582cc96SHong Zhang   PetscBool      flg=PETSC_FALSE,isBAIJ=PETSC_FALSE;
149b582cc96SHong Zhang   PetscScalar    *ca;
150525d23c0SHong Zhang   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
151525d23c0SHong Zhang   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
152525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
153525d23c0SHong Zhang   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
154525d23c0SHong Zhang #endif
155525d23c0SHong Zhang 
156525d23c0SHong Zhang   PetscFunctionBegin;
157525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
158525d23c0SHong Zhang   ierr = PetscTime(&t0);CHKERRQ(ierr);
159525d23c0SHong Zhang #endif
160b582cc96SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
161b582cc96SHong Zhang   if (isBAIJ) {
162b582cc96SHong Zhang     Mat_SeqBAIJ     *csp=(Mat_SeqBAIJ*)J->data;
163b582cc96SHong Zhang     ca = csp->a;
164b582cc96SHong Zhang   } else {
165b582cc96SHong Zhang     Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
166b582cc96SHong Zhang     ca = csp->a;
167b582cc96SHong Zhang     bs = 1; bs2 = 1;
168b582cc96SHong Zhang   }
169b582cc96SHong Zhang 
170525d23c0SHong Zhang   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
171525d23c0SHong Zhang   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
172525d23c0SHong Zhang   if (flg) {
173525d23c0SHong Zhang     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
174525d23c0SHong Zhang   } else {
175525d23c0SHong Zhang     PetscBool assembled;
176525d23c0SHong Zhang     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
177525d23c0SHong Zhang     if (assembled) {
178525d23c0SHong Zhang       ierr = MatZeroEntries(J);CHKERRQ(ierr);
179525d23c0SHong Zhang     }
180525d23c0SHong Zhang   }
181525d23c0SHong Zhang 
182525d23c0SHong Zhang   if (!coloring->vscale) {
183525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
184525d23c0SHong Zhang   }
185525d23c0SHong Zhang 
186525d23c0SHong Zhang   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
187525d23c0SHong Zhang     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
188525d23c0SHong Zhang   }
189525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
190525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
191525d23c0SHong Zhang     t_init += t1 - t0;
192525d23c0SHong Zhang #endif
193525d23c0SHong Zhang 
194525d23c0SHong Zhang   /* Set w1 = F(x1) */
195525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
196525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
197525d23c0SHong Zhang #endif
198525d23c0SHong Zhang   if (!coloring->fset) {
199525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
200525d23c0SHong Zhang     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
201525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
202525d23c0SHong Zhang   } else {
203525d23c0SHong Zhang     coloring->fset = PETSC_FALSE;
204525d23c0SHong Zhang   }
205525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
206525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
207525d23c0SHong Zhang     t_setvals += t1 - t0;
208525d23c0SHong Zhang #endif
209525d23c0SHong Zhang 
210525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
211525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
212525d23c0SHong Zhang #endif
213525d23c0SHong Zhang   if (!coloring->w3) {
214525d23c0SHong Zhang     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
215525d23c0SHong Zhang     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
216525d23c0SHong Zhang   }
217525d23c0SHong Zhang   w3 = coloring->w3;
218525d23c0SHong Zhang 
219525d23c0SHong Zhang   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
220525d23c0SHong Zhang   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
221525d23c0SHong Zhang   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
222525d23c0SHong Zhang   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
223525d23c0SHong Zhang   for (col=0; col<N; col++) {
224525d23c0SHong Zhang     if (coloring->htype[0] == 'w') {
225525d23c0SHong Zhang       dx = 1.0 + unorm;
226525d23c0SHong Zhang     } else {
227525d23c0SHong Zhang       dx = xx[col];
228525d23c0SHong Zhang     }
229525d23c0SHong Zhang     if (dx == (PetscScalar)0.0) dx = 1.0;
230525d23c0SHong Zhang     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
231525d23c0SHong Zhang     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
232525d23c0SHong Zhang     dx               *= epsilon;
233525d23c0SHong Zhang     vscale_array[col] = (PetscScalar)dx;
234525d23c0SHong Zhang   }
235525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
236525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
237525d23c0SHong Zhang     t_init += t1 - t0;
238525d23c0SHong Zhang #endif
239525d23c0SHong Zhang 
240525d23c0SHong Zhang   nz  = 0;
241525d23c0SHong Zhang   for (k=0; k<ncolors; k++) { /* loop over colors */
242525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
243525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
244525d23c0SHong Zhang #endif
245525d23c0SHong Zhang     coloring->currentcolor = k;
246525d23c0SHong Zhang 
247525d23c0SHong Zhang     /*
248525d23c0SHong Zhang       Loop over each column associated with color
249525d23c0SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
250525d23c0SHong Zhang     */
251525d23c0SHong Zhang     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
252525d23c0SHong Zhang     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
253525d23c0SHong Zhang     ncolumns_k = ncolumns[k];
254525d23c0SHong Zhang     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
255525d23c0SHong Zhang       col = columns[k][l];
256525d23c0SHong Zhang       w3_array[col] += vscale_array[col];
257525d23c0SHong Zhang     }
258525d23c0SHong Zhang     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
259525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
260525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
261525d23c0SHong Zhang     t_dx += t1 - t0;
262525d23c0SHong Zhang #endif
263525d23c0SHong Zhang 
264525d23c0SHong Zhang     /*
265525d23c0SHong Zhang       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
266525d23c0SHong Zhang                            w2 = F(x1 + dx) - F(x1)
267525d23c0SHong Zhang     */
268525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
269525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
270525d23c0SHong Zhang #endif
271525d23c0SHong Zhang     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
272525d23c0SHong Zhang     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
273525d23c0SHong Zhang     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
274525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
275525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
276525d23c0SHong Zhang     t_func += t1 - t0;
277525d23c0SHong Zhang #endif
278525d23c0SHong Zhang 
279525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
280525d23c0SHong Zhang     ierr = PetscTime(&t0);CHKERRQ(ierr);
281525d23c0SHong Zhang #endif
282525d23c0SHong Zhang     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
283525d23c0SHong Zhang 
284525d23c0SHong Zhang     /*
285525d23c0SHong Zhang       Loop over rows of vector, putting w2/dx into Jacobian matrix
286525d23c0SHong Zhang     */
287525d23c0SHong Zhang     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
288525d23c0SHong Zhang     nrows_k = nrows[k];
289525d23c0SHong Zhang     for (l=0; l<nrows_k; l++) { /* loop over rows */
290525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
291525d23c0SHong Zhang     ierr = PetscTime(&t00);CHKERRQ(ierr);
292525d23c0SHong Zhang #endif
293525d23c0SHong Zhang       row   = coloring->rowcolden2sp3[nz++];
294525d23c0SHong Zhang       col   = coloring->rowcolden2sp3[nz++];
295525d23c0SHong Zhang       spidx = coloring->rowcolden2sp3[nz++];
296525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
297525d23c0SHong Zhang       ierr = PetscTime(&t11);CHKERRQ(ierr);
298525d23c0SHong Zhang       t_kl += t11 - t00;
299525d23c0SHong Zhang #endif
300b582cc96SHong Zhang       //printf(" row col val
301b582cc96SHong Zhang       ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs];
302b582cc96SHong Zhang       //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]);
303525d23c0SHong Zhang     }
304525d23c0SHong Zhang     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
305525d23c0SHong Zhang 
306525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
307525d23c0SHong Zhang     ierr = PetscTime(&t1);CHKERRQ(ierr);
308525d23c0SHong Zhang     t_setvals += t1 - t0;
309525d23c0SHong Zhang #endif
310525d23c0SHong Zhang   } /* endof for each color */
311525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT)
312525d23c0SHong Zhang   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
313525d23c0SHong Zhang   printf("     FDColorApply time: kl %g\n",t_kl);
314525d23c0SHong Zhang #endif
315525d23c0SHong Zhang   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
316525d23c0SHong Zhang   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
317525d23c0SHong Zhang 
318525d23c0SHong Zhang   coloring->currentcolor = -1;
319525d23c0SHong Zhang   PetscFunctionReturn(0);
320525d23c0SHong Zhang }
321639f9d9dSBarry Smith 
32279c1e64dSHong Zhang #undef __FUNCT__
32379c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
32479c1e64dSHong Zhang /*
32579c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
32679c1e64dSHong Zhang */
32779c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
32879c1e64dSHong Zhang {
32979c1e64dSHong Zhang   PetscErrorCode ierr;
33079c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
33179c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
33252011a10SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
33379c1e64dSHong Zhang   IS             *isa;
334525d23c0SHong Zhang   PetscBool      isBAIJ;
33579c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
33679c1e64dSHong Zhang 
33779c1e64dSHong Zhang   PetscFunctionBegin;
33879c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
33952011a10SHong Zhang 
340476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
341476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
34279c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
343525d23c0SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
344525d23c0SHong Zhang   if (!isBAIJ) {
34552011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
34679c1e64dSHong Zhang   }
34779c1e64dSHong Zhang   N         = mat->cmap->N/bs;
34879c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
34979c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
35079c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
35179c1e64dSHong Zhang   c->rstart = 0;
35279c1e64dSHong Zhang 
35379c1e64dSHong Zhang   c->ncolors = nis;
35479c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
35579c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
35679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
35785740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
35879c1e64dSHong Zhang 
359525d23c0SHong Zhang   if (isBAIJ) {
360525d23c0SHong Zhang     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
361525d23c0SHong Zhang   } else {
3626378f32dSHong Zhang     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
363525d23c0SHong Zhang   }
36479c1e64dSHong Zhang 
365476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
36679c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
36779c1e64dSHong Zhang 
36817a7dee1SHong Zhang   nz = 0;
36979c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
37079c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
37179c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
37279c1e64dSHong Zhang 
37379c1e64dSHong Zhang     c->ncolumns[i] = n;
37479c1e64dSHong Zhang     if (n) {
37579c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
37679c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
37779c1e64dSHong Zhang     } else {
37879c1e64dSHong Zhang       c->columns[i] = 0;
37979c1e64dSHong Zhang     }
38079c1e64dSHong Zhang 
38179c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
382476e0d0aSHong Zhang     nrows = 0;
38379c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
38479c1e64dSHong Zhang       col  = is[j];
38579c1e64dSHong Zhang       rows = cj + ci[col];
38679c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
38779c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
38879c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
389476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
390476e0d0aSHong Zhang         nrows++;
39179c1e64dSHong Zhang       }
39279c1e64dSHong Zhang     }
393476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
39479c1e64dSHong Zhang 
39579c1e64dSHong Zhang     nrows = 0;
39679c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
39779c1e64dSHong Zhang       if (rowhit[j]) {
39817a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
39917a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
40017a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
40179c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
40279c1e64dSHong Zhang       }
40379c1e64dSHong Zhang     }
40479c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
40579c1e64dSHong Zhang   }
40617a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
40785740eacSHong Zhang 
408525d23c0SHong Zhang   if (isBAIJ) {
409525d23c0SHong Zhang     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
410525d23c0SHong Zhang   } else {
4116378f32dSHong Zhang     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
412525d23c0SHong Zhang   }
413476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
41479c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
415476e0d0aSHong Zhang 
4164737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
417b582cc96SHong Zhang   if (isBAIJ) {
418b582cc96SHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
419b582cc96SHong Zhang   } else {
42079c1e64dSHong Zhang     mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
421b582cc96SHong Zhang   }
42252011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
42379c1e64dSHong Zhang   PetscFunctionReturn(0);
42479c1e64dSHong Zhang }
42579c1e64dSHong Zhang 
4264a2ae208SSatish Balay #undef __FUNCT__
4274a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
4283acb8795SBarry Smith /*
4293acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
4303acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
4313acb8795SBarry Smith */
432dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
433639f9d9dSBarry Smith {
4346849ba73SBarry Smith   PetscErrorCode ierr;
4351a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
4361a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
4373acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
438b9617806SBarry Smith   IS             *isa;
439ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
440476e0d0aSHong Zhang   PetscBool      flg1;
44179c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
442639f9d9dSBarry Smith 
4433a40ed3dSBarry Smith   PetscFunctionBegin;
44479c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
44579c1e64dSHong Zhang   if (new_impl){
44679c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
44779c1e64dSHong Zhang     PetscFunctionReturn(0);
44879c1e64dSHong Zhang   }
449e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
450522c5e43SBarry Smith 
451b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
452476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
453476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
454251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
455476e0d0aSHong Zhang   if (flg1) {
4563acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
4573acb8795SBarry Smith   }
4583acb8795SBarry Smith 
4593acb8795SBarry Smith   N         = mat->cmap->N/bs;
4603acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
4613acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
4623acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
463005c665bSBarry Smith   c->rstart = 0;
464005c665bSBarry Smith 
465639f9d9dSBarry Smith   c->ncolors = nis;
46638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
46738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
46838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
46938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
47038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
47143a90d84SBarry Smith 
4723acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
473ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
47470c3da92SBarry Smith 
47570c3da92SBarry Smith   /*
476639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
47770c3da92SBarry Smith   */
4780298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
47970c3da92SBarry Smith 
48038baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
48138baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
48270c3da92SBarry Smith 
483639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
484b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
485639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
4862205254eSKarl Rupp 
487639f9d9dSBarry Smith     c->ncolumns[i] = n;
488639f9d9dSBarry Smith     if (n) {
48938baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
49038baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
49170c3da92SBarry Smith     } else {
492639f9d9dSBarry Smith       c->columns[i] = 0;
49370c3da92SBarry Smith     }
49470c3da92SBarry Smith 
4953a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
4963a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
49738baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
498639f9d9dSBarry Smith       /* loop over columns*/
499639f9d9dSBarry Smith       for (j=0; j<n; j++) {
500639f9d9dSBarry Smith         col  = is[j];
501639f9d9dSBarry Smith         rows = cj + ci[col];
502639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
503639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
504639f9d9dSBarry Smith         for (k=0; k<m; k++) {
505639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
50670c3da92SBarry Smith         }
50770c3da92SBarry Smith       }
508639f9d9dSBarry Smith       /* count the number of hits */
509639f9d9dSBarry Smith       nrows = 0;
51070c3da92SBarry Smith       for (j=0; j<N; j++) {
511639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
512639f9d9dSBarry Smith       }
513639f9d9dSBarry Smith       c->nrows[i] = nrows;
51438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
51538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
516639f9d9dSBarry Smith       nrows       = 0;
517639f9d9dSBarry Smith       for (j=0; j<N; j++) {
518639f9d9dSBarry Smith         if (rowhit[j]) {
519639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
520639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
521639f9d9dSBarry Smith           nrows++;
52270c3da92SBarry Smith         }
52370c3da92SBarry Smith       }
524639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
5253a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
52638baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
527639f9d9dSBarry Smith       rowhit[N] = N;
528639f9d9dSBarry Smith       nrows     = 0;
529639f9d9dSBarry Smith       /* loop over columns */
530639f9d9dSBarry Smith       for (j=0; j<n; j++) {
531639f9d9dSBarry Smith         col  = is[j];
532639f9d9dSBarry Smith         rows = cj + ci[col];
533639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
534639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
535639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
536639f9d9dSBarry Smith         for (k=0; k<m; k++) {
537639f9d9dSBarry Smith           currentcol = *rows++;
538639f9d9dSBarry Smith           /* is it already in the list? */
539639f9d9dSBarry Smith           do {
540639f9d9dSBarry Smith             mfm = fm;
541639f9d9dSBarry Smith             fm  = rowhit[fm];
542639f9d9dSBarry Smith           } while (fm < currentcol);
543639f9d9dSBarry Smith           /* not in list so add it */
544639f9d9dSBarry Smith           if (fm != currentcol) {
545639f9d9dSBarry Smith             nrows++;
546639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
547639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
548639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
549639f9d9dSBarry Smith             rowhit[currentcol] = fm;
550639f9d9dSBarry Smith             fm                 = currentcol;
551639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
55271cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
553639f9d9dSBarry Smith         }
554639f9d9dSBarry Smith       }
555639f9d9dSBarry Smith       c->nrows[i] = nrows;
55638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
55738baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
558639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
559639f9d9dSBarry Smith       nrows = 0;
560639f9d9dSBarry Smith       fm    = rowhit[N];
561639f9d9dSBarry Smith       do {
562639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
563639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
564639f9d9dSBarry Smith         fm                           = rowhit[fm];
565639f9d9dSBarry Smith       } while (fm < N);
566639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
567639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
568639f9d9dSBarry Smith   }
5693acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
570639f9d9dSBarry Smith 
571606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
572606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
573639f9d9dSBarry Smith 
57430b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
57530b34957SBarry Smith   /*
57630b34957SBarry Smith        see the version for MPIAIJ
57730b34957SBarry Smith   */
578ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
57938baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
58030b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
58138baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
58230b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
58330b34957SBarry Smith       col = c->columnsforrow[k][l];
5842205254eSKarl Rupp 
58530b34957SBarry Smith       c->vscaleforrow[k][l] = col;
58630b34957SBarry Smith     }
58730b34957SBarry Smith   }
588b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
5893a40ed3dSBarry Smith   PetscFunctionReturn(0);
59070c3da92SBarry Smith }
59179c1e64dSHong Zhang 
592