xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision f8c2866e654d46b30197bc999c108affa4012b0e)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   PetscScalar    *valaddr;
19   MatEntry       *Jentry=coloring->matentry;
20 
21   PetscFunctionBegin;
22   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24   if (flg) {
25     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26   } else {
27     PetscBool assembled;
28     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29     if (assembled) {
30       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31     }
32   }
33   if (!coloring->vscale) {
34     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35   }
36 
37   /* Set w1 = F(x1) */
38   if (!coloring->fset) {
39     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42   } else {
43     coloring->fset = PETSC_FALSE;
44   }
45 
46   if (!coloring->w3) {
47     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49   }
50   w3 = coloring->w3;
51 
52   /* Compute local scale factors: vscale = dx */
53   if (coloring->htype[0] == 'w') {
54     /* tacky test; need to make systematic if we add other approaches to computing h*/
55     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56     dx = (1.0 + unorm)*epsilon;
57     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58   } else {
59     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61     for (col=0; col<N; col++) {
62       dx = xx[col];
63       if (dx == 0.0) dx = 1.0;
64       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66       dx               *= epsilon;
67       vscale_array[col] = dx;
68     }
69     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
71   }
72 
73   nz = 0;
74   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76     coloring->currentcolor = k;
77 
78     /* Compute w3 = x1 + dx */
79     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80     dy_i = dy;
81     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83       for (l=0; l<coloring->ncolumns[k]; l++) {
84         col            = i + bs*coloring->columns[k][l];
85         w3_array[col] += vscale_array[col];
86         if (i) {
87           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88         }
89       }
90       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91 
92       /* Evaluate function w2 = F(w3) - F(x1) */
93       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98       ierr = VecResetArray(w2);CHKERRQ(ierr);
99       dy_i += N; /* points to dy+i*N */
100     }
101 
102     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103     for (l=0; l<coloring->nrows[k]; l++) {
104       row     = bs*Jentry[nz].row;
105       col     = bs*Jentry[nz].col;
106       valaddr = Jentry[nz].valaddr;
107       nz++;
108       spidx = 0;
109       dy_i  = dy;
110       for (i=0; i<bs; i++) {   /* column of the block */
111         for (j=0; j<bs; j++) { /* row of the block */
112           valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i];
113         }
114         dy_i += N; /* points to dy+i*N */
115       }
116     }
117   } /* endof for each color */
118   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
119 
120   coloring->currentcolor = -1;
121   PetscFunctionReturn(0);
122 }
123 
124 extern PetscErrorCode  MatFDColoringApply_AIJ_new(Mat,MatFDColoring,Vec,MatStructure*,void*);
125 extern PetscErrorCode  MatFDColoringApply_BAIJ_new(Mat,MatFDColoring,Vec,MatStructure*,void *);
126 /* also used for SeqBAIJ matrices */
127 #undef __FUNCT__
128 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new"
129 PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c)
130 {
131   PetscErrorCode ierr;
132   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
133   const PetscInt *is,*row,*ci,*cj;
134   PetscInt       nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz;
135   IS             *isa;
136   PetscBool      isBAIJ;
137   Mat_SeqAIJ     *spA = (Mat_SeqAIJ*)mat->data;
138   PetscScalar    *A_val=spA->a;
139   PetscScalar    **valaddrhit;
140   MatEntry       *Jentry;
141 
142   PetscFunctionBegin;
143   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
144 
145   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
146      SeqBAIJ calls this routine, thus check it */
147   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
148   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
149   if (!isBAIJ) {
150     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
151   }
152   N         = mat->cmap->N/bs;
153   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
154   c->N      = mat->cmap->N/bs;
155   c->m      = mat->rmap->N/bs;
156   c->rstart = 0;
157 
158   c->ncolors = nis;
159   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
160   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
161   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
162   ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
163 
164   ierr       = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
165   ierr       = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr);
166   c->matentry = Jentry;
167 
168   if (isBAIJ) {
169     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
170   } else {
171     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
172   }
173 
174   ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
175   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
176 
177   nz = 0;
178   for (i=0; i<nis; i++) { /* loop over colors */
179     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
180     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
181 
182     c->ncolumns[i] = n;
183     if (n) {
184       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
185       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
186       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
187     } else {
188       c->columns[i] = 0;
189     }
190 
191     /* fast, crude version requires O(N*N) work */
192     bs2   = bs*bs;
193     nrows = 0;
194     for (j=0; j<n; j++) {  /* loop over columns */
195       col    = is[j];
196       row    = cj + ci[col];
197       m      = ci[col+1] - ci[col];
198       nrows += m;
199       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
200         rowhit[*row]       = col + 1;
201         valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]];
202       }
203     }
204     c->nrows[i] = nrows; /* total num of rows for this color */
205 
206     for (j=0; j<N; j++) { /* loop over rows */
207       if (rowhit[j]) {
208         Jentry[nz].row     = j;              /* local row index */
209         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
210         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
211         nz++;
212         rowhit[j] = 0.0;                     /* zero rowhit for reuse */
213       }
214     }
215     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
216   }
217   if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz);
218 
219   if (isBAIJ) {
220     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
221   } else {
222     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
223   }
224   ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
225   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
226 
227   c->ctype                  = IS_COLORING_GHOSTED;
228   if (isBAIJ) {
229     mat->ops->fdcoloringapply = MatFDColoringApply_BAIJ_new;
230     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
231   } else {
232     mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new;
233   }
234   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
240 /*
241     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
242     This is why it has the ugly code with the MatGetBlockSize()
243 */
244 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
245 {
246   PetscErrorCode ierr;
247   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
248   const PetscInt *is,*rows,*ci,*cj;
249   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
250   IS             *isa;
251   PetscBool      done,flg = PETSC_FALSE;
252   PetscBool      flg1;
253   PetscBool      new_impl=PETSC_FALSE;
254 
255   PetscFunctionBegin;
256   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
257   if (new_impl){
258     ierr =  MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr);
259     PetscFunctionReturn(0);
260   }
261   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
262 
263   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
264   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
265      SeqBAIJ calls this routine, thus check it */
266   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
267   if (flg1) {
268     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
269   }
270 
271   N         = mat->cmap->N/bs;
272   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
273   c->N      = mat->cmap->N/bs;
274   c->m      = mat->rmap->N/bs;
275   c->rstart = 0;
276 
277   c->ncolors = nis;
278   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
279   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
280   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
281   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
282   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
283 
284   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
285   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
286 
287   /*
288      Temporary option to allow for debugging/testing
289   */
290   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
291 
292   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
293   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
294 
295   for (i=0; i<nis; i++) {
296     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
297     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
298 
299     c->ncolumns[i] = n;
300     if (n) {
301       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
302       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
303     } else {
304       c->columns[i] = 0;
305     }
306 
307     if (!flg) { /* ------------------------------------------------------------------------------*/
308       /* fast, crude version requires O(N*N) work */
309       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
310       /* loop over columns*/
311       for (j=0; j<n; j++) {
312         col  = is[j];
313         rows = cj + ci[col];
314         m    = ci[col+1] - ci[col];
315         /* loop over columns marking them in rowhit */
316         for (k=0; k<m; k++) {
317           rowhit[*rows++] = col + 1;
318         }
319       }
320       /* count the number of hits */
321       nrows = 0;
322       for (j=0; j<N; j++) {
323         if (rowhit[j]) nrows++;
324       }
325       c->nrows[i] = nrows;
326       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
327       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
328       nrows       = 0;
329       for (j=0; j<N; j++) {
330         if (rowhit[j]) {
331           c->rows[i][nrows]          = j;
332           c->columnsforrow[i][nrows] = rowhit[j] - 1;
333           nrows++;
334         }
335       }
336     } else {  /*-------------------------------------------------------------------------------*/
337       /* slow version, using rowhit as a linked list */
338       PetscInt currentcol,fm,mfm;
339       rowhit[N] = N;
340       nrows     = 0;
341       /* loop over columns */
342       for (j=0; j<n; j++) {
343         col  = is[j];
344         rows = cj + ci[col];
345         m    = ci[col+1] - ci[col];
346         /* loop over columns marking them in rowhit */
347         fm = N;    /* fm points to first entry in linked list */
348         for (k=0; k<m; k++) {
349           currentcol = *rows++;
350           /* is it already in the list? */
351           do {
352             mfm = fm;
353             fm  = rowhit[fm];
354           } while (fm < currentcol);
355           /* not in list so add it */
356           if (fm != currentcol) {
357             nrows++;
358             columnsforrow[currentcol] = col;
359             /* next three lines insert new entry into linked list */
360             rowhit[mfm]        = currentcol;
361             rowhit[currentcol] = fm;
362             fm                 = currentcol;
363             /* fm points to present position in list since we know the columns are sorted */
364           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
365         }
366       }
367       c->nrows[i] = nrows;
368       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
369       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
370       /* now store the linked list of rows into c->rows[i] */
371       nrows = 0;
372       fm    = rowhit[N];
373       do {
374         c->rows[i][nrows]            = fm;
375         c->columnsforrow[i][nrows++] = columnsforrow[fm];
376         fm                           = rowhit[fm];
377       } while (fm < N);
378     } /* ---------------------------------------------------------------------------------------*/
379     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
380   }
381   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
382 
383   ierr = PetscFree(rowhit);CHKERRQ(ierr);
384   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
385 
386   /* Optimize by adding the vscale, and scaleforrow[][] fields */
387   /*
388        see the version for MPIAIJ
389   */
390   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
391   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
392   for (k=0; k<c->ncolors; k++) {
393     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
394     for (l=0; l<c->nrows[k]; l++) {
395       col = c->columnsforrow[k][l];
396 
397       c->vscaleforrow[k][l] = col;
398     }
399   }
400   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404