xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 525d23c034271bd270c20f595b9bb6ddbc2c1c84)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */
6 /* #define JACOBIANCOLOROPT */
7 #if defined(JACOBIANCOLOROPT)
8 #include <petsctime.h>
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatFDColoringApply_SeqAIJ"
12 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
13 {
14   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
15   PetscErrorCode ierr;
16   PetscInt       k,l,row,col,N;
17   PetscScalar    dx,*y,*xx,*w3_array;
18   PetscScalar    *vscale_array;
19   PetscReal      epsilon=coloring->error_rel,umin = coloring->umin,unorm;
20   Vec            w1=coloring->w1,w2=coloring->w2,w3;
21   void           *fctx=coloring->fctx;
22   PetscBool      flg=PETSC_FALSE;
23   Mat_SeqAIJ     *csp=(Mat_SeqAIJ*)J->data;
24   PetscScalar    *ca=csp->a;
25   const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
26   PetscInt       **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx;
27 #if defined(JACOBIANCOLOROPT)
28   PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11;
29 #endif
30 
31   PetscFunctionBegin;
32 #if defined(JACOBIANCOLOROPT)
33     ierr = PetscTime(&t0);CHKERRQ(ierr);
34 #endif
35   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
36   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
37   if (flg) {
38     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
39   } else {
40     PetscBool assembled;
41     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
42     if (assembled) {
43       ierr = MatZeroEntries(J);CHKERRQ(ierr);
44     }
45   }
46 
47   if (!coloring->vscale) {
48     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
49   }
50 
51   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
52     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
53   }
54 #if defined(JACOBIANCOLOROPT)
55     ierr = PetscTime(&t1);CHKERRQ(ierr);
56     t_init += t1 - t0;
57 #endif
58 
59   /* Set w1 = F(x1) */
60 #if defined(JACOBIANCOLOROPT)
61     ierr = PetscTime(&t0);CHKERRQ(ierr);
62 #endif
63   if (!coloring->fset) {
64     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
65     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
66     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
67   } else {
68     coloring->fset = PETSC_FALSE;
69   }
70 #if defined(JACOBIANCOLOROPT)
71     ierr = PetscTime(&t1);CHKERRQ(ierr);
72     t_setvals += t1 - t0;
73 #endif
74 
75 #if defined(JACOBIANCOLOROPT)
76     ierr = PetscTime(&t0);CHKERRQ(ierr);
77 #endif
78   if (!coloring->w3) {
79     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
80     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
81   }
82   w3 = coloring->w3;
83 
84   /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */
85   ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
86   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
87   ierr = VecGetSize(x1,&N);CHKERRQ(ierr);
88   for (col=0; col<N; col++) {
89     if (coloring->htype[0] == 'w') {
90       dx = 1.0 + unorm;
91     } else {
92       dx = xx[col];
93     }
94     if (dx == (PetscScalar)0.0) dx = 1.0;
95     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
96     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
97     dx               *= epsilon;
98     vscale_array[col] = (PetscScalar)dx;
99   }
100 #if defined(JACOBIANCOLOROPT)
101     ierr = PetscTime(&t1);CHKERRQ(ierr);
102     t_init += t1 - t0;
103 #endif
104 
105   nz  = 0;
106   for (k=0; k<ncolors; k++) { /* loop over colors */
107 #if defined(JACOBIANCOLOROPT)
108     ierr = PetscTime(&t0);CHKERRQ(ierr);
109 #endif
110     coloring->currentcolor = k;
111 
112     /*
113       Loop over each column associated with color
114       adding the perturbation to the vector w3 = x1 + dx.
115     */
116     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
117     ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
118     ncolumns_k = ncolumns[k];
119     for (l=0; l<ncolumns_k; l++) { /* loop over columns */
120       col = columns[k][l];
121       w3_array[col] += vscale_array[col];
122     }
123     ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
124 #if defined(JACOBIANCOLOROPT)
125     ierr = PetscTime(&t1);CHKERRQ(ierr);
126     t_dx += t1 - t0;
127 #endif
128 
129     /*
130       Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
131                            w2 = F(x1 + dx) - F(x1)
132     */
133 #if defined(JACOBIANCOLOROPT)
134     ierr = PetscTime(&t0);CHKERRQ(ierr);
135 #endif
136     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
137     ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
138     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
139 #if defined(JACOBIANCOLOROPT)
140     ierr = PetscTime(&t1);CHKERRQ(ierr);
141     t_func += t1 - t0;
142 #endif
143 
144 #if defined(JACOBIANCOLOROPT)
145     ierr = PetscTime(&t0);CHKERRQ(ierr);
146 #endif
147     ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
148 
149     /*
150       Loop over rows of vector, putting w2/dx into Jacobian matrix
151     */
152     ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
153     nrows_k = nrows[k];
154     for (l=0; l<nrows_k; l++) { /* loop over rows */
155 #if defined(JACOBIANCOLOROPT)
156     ierr = PetscTime(&t00);CHKERRQ(ierr);
157 #endif
158       row   = coloring->rowcolden2sp3[nz++];
159       col   = coloring->rowcolden2sp3[nz++];
160       spidx = coloring->rowcolden2sp3[nz++];
161 #if defined(JACOBIANCOLOROPT)
162       ierr = PetscTime(&t11);CHKERRQ(ierr);
163       t_kl += t11 - t00;
164 #endif
165       ca[spidx] = y[row]/vscale_array[col];
166     }
167     ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
168 
169 #if defined(JACOBIANCOLOROPT)
170     ierr = PetscTime(&t1);CHKERRQ(ierr);
171     t_setvals += t1 - t0;
172 #endif
173   } /* endof for each color */
174 #if defined(JACOBIANCOLOROPT)
175   printf("     FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx);
176   printf("     FDColorApply time: kl %g\n",t_kl);
177 #endif
178   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
179   ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
180 
181   coloring->currentcolor = -1;
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
187 /*
188     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
189 */
190 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
191 {
192   PetscErrorCode ierr;
193   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
194   const PetscInt *is,*rows,*ci,*cj;
195   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
196   IS             *isa;
197   PetscBool      isBAIJ;
198   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
199 
200   PetscFunctionBegin;
201   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
202 
203   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
204      SeqBAIJ calls this routine, thus check it */
205   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
206   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
207   if (!isBAIJ) {
208     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
209   }
210   N         = mat->cmap->N/bs;
211   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
212   c->N      = mat->cmap->N/bs;
213   c->m      = mat->rmap->N/bs;
214   c->rstart = 0;
215 
216   c->ncolors = nis;
217   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
218   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
219   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
220   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
221 
222   if (isBAIJ) {
223     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
224   } else {
225     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
226   }
227 
228   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
229   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
230 
231   nz = 0;
232   for (i=0; i<nis; i++) { /* loop over colors */
233     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
234     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
235 
236     c->ncolumns[i] = n;
237     if (n) {
238       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
239       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
240     } else {
241       c->columns[i] = 0;
242     }
243 
244     /* fast, crude version requires O(N*N) work */
245     nrows = 0;
246     for (j=0; j<n; j++) {  /* loop over columns */
247       col  = is[j];
248       rows = cj + ci[col];
249       m    = ci[col+1] - ci[col];
250       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
251         rowhit[*rows]     = col + 1;
252         spidxhit[*rows++] = spidx[ci[col] + k];
253         nrows++;
254       }
255     }
256     c->nrows[i] = nrows; /* total num of rows for this color */
257 
258     nrows = 0;
259     for (j=0; j<N; j++) { /* loop over rows */
260       if (rowhit[j]) {
261         c->rowcolden2sp3[nz++]   = j;             /* row index */
262         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
263         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
264         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
265       }
266     }
267     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
268   }
269   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
270 
271   if (isBAIJ) {
272     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
273   } else {
274     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
275   }
276   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
277   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
278 
279   c->ctype                  = IS_COLORING_GHOSTED;
280   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
281   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
287 /*
288     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
289     This is why it has the ugly code with the MatGetBlockSize()
290 */
291 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
292 {
293   PetscErrorCode ierr;
294   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
295   const PetscInt *is,*rows,*ci,*cj;
296   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
297   IS             *isa;
298   PetscBool      done,flg = PETSC_FALSE;
299   PetscBool      flg1;
300   PetscBool      new_impl=PETSC_FALSE;
301 
302   PetscFunctionBegin;
303   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
304   if (new_impl){
305     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
306     PetscFunctionReturn(0);
307   }
308   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
309 
310   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
311   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
312      SeqBAIJ calls this routine, thus check it */
313   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
314   if (flg1) {
315     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
316   }
317 
318   N         = mat->cmap->N/bs;
319   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
320   c->N      = mat->cmap->N/bs;
321   c->m      = mat->rmap->N/bs;
322   c->rstart = 0;
323 
324   c->ncolors = nis;
325   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
326   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
327   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
328   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
329   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
330 
331   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
332   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
333 
334   /*
335      Temporary option to allow for debugging/testing
336   */
337   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
338 
339   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
340   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
341 
342   for (i=0; i<nis; i++) {
343     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
344     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
345 
346     c->ncolumns[i] = n;
347     if (n) {
348       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
349       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
350     } else {
351       c->columns[i] = 0;
352     }
353 
354     if (!flg) { /* ------------------------------------------------------------------------------*/
355       /* fast, crude version requires O(N*N) work */
356       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
357       /* loop over columns*/
358       for (j=0; j<n; j++) {
359         col  = is[j];
360         rows = cj + ci[col];
361         m    = ci[col+1] - ci[col];
362         /* loop over columns marking them in rowhit */
363         for (k=0; k<m; k++) {
364           rowhit[*rows++] = col + 1;
365         }
366       }
367       /* count the number of hits */
368       nrows = 0;
369       for (j=0; j<N; j++) {
370         if (rowhit[j]) nrows++;
371       }
372       c->nrows[i] = nrows;
373       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
374       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
375       nrows       = 0;
376       for (j=0; j<N; j++) {
377         if (rowhit[j]) {
378           c->rows[i][nrows]          = j;
379           c->columnsforrow[i][nrows] = rowhit[j] - 1;
380           nrows++;
381         }
382       }
383     } else {  /*-------------------------------------------------------------------------------*/
384       /* slow version, using rowhit as a linked list */
385       PetscInt currentcol,fm,mfm;
386       rowhit[N] = N;
387       nrows     = 0;
388       /* loop over columns */
389       for (j=0; j<n; j++) {
390         col  = is[j];
391         rows = cj + ci[col];
392         m    = ci[col+1] - ci[col];
393         /* loop over columns marking them in rowhit */
394         fm = N;    /* fm points to first entry in linked list */
395         for (k=0; k<m; k++) {
396           currentcol = *rows++;
397           /* is it already in the list? */
398           do {
399             mfm = fm;
400             fm  = rowhit[fm];
401           } while (fm < currentcol);
402           /* not in list so add it */
403           if (fm != currentcol) {
404             nrows++;
405             columnsforrow[currentcol] = col;
406             /* next three lines insert new entry into linked list */
407             rowhit[mfm]        = currentcol;
408             rowhit[currentcol] = fm;
409             fm                 = currentcol;
410             /* fm points to present position in list since we know the columns are sorted */
411           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
412         }
413       }
414       c->nrows[i] = nrows;
415       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
416       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
417       /* now store the linked list of rows into c->rows[i] */
418       nrows = 0;
419       fm    = rowhit[N];
420       do {
421         c->rows[i][nrows]            = fm;
422         c->columnsforrow[i][nrows++] = columnsforrow[fm];
423         fm                           = rowhit[fm];
424       } while (fm < N);
425     } /* ---------------------------------------------------------------------------------------*/
426     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
427   }
428   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
429 
430   ierr = PetscFree(rowhit);CHKERRQ(ierr);
431   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
432 
433   /* Optimize by adding the vscale, and scaleforrow[][] fields */
434   /*
435        see the version for MPIAIJ
436   */
437   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
438   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
439   for (k=0; k<c->ncolors; k++) {
440     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
441     for (l=0; l<c->nrows[k]; l++) {
442       col = c->columnsforrow[k][l];
443 
444       c->vscaleforrow[k][l] = col;
445     }
446   }
447   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451