xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision c53567a07016831a3aeb692e1ce5d8d772267d49)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ"
7 PetscErrorCode  MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
8 {
9   PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
10   PetscErrorCode ierr;
11   PetscInt       bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz;
12   PetscScalar    dx,*dy_i,*xx,*w3_array,*dy=coloring->dy;
13   PetscScalar    *vscale_array;
14   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
15   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
16   void           *fctx   = coloring->fctx;
17   PetscBool      flg     = PETSC_FALSE;
18   PetscScalar    *valaddr;
19   MatEntry       *Jentry=coloring->matentry;
20 
21   PetscFunctionBegin;
22   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
23   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
24   if (flg) {
25     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
26   } else {
27     PetscBool assembled;
28     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
29     if (assembled) {
30       ierr = MatZeroEntries(J);CHKERRQ(ierr);
31     }
32   }
33   if (!coloring->vscale) {
34     ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr);
35   }
36 
37   /* Set w1 = F(x1) */
38   if (!coloring->fset) {
39     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
40     ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr);
41     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
42   } else {
43     coloring->fset = PETSC_FALSE;
44   }
45 
46   if (!coloring->w3) {
47     ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr);
48     ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr);
49   }
50   w3 = coloring->w3;
51 
52   /* Compute local scale factors: vscale = dx */
53   if (coloring->htype[0] == 'w') {
54     /* tacky test; need to make systematic if we add other approaches to computing h*/
55     ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr);
56     dx = (1.0 + unorm)*epsilon;
57     ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr);
58   } else {
59     ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
60     ierr = VecGetArray(x1,&xx);CHKERRQ(ierr);
61     for (col=0; col<N; col++) {
62       dx = xx[col];
63       if (dx == 0.0) dx = 1.0;
64       if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
65       else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
66       dx               *= epsilon;
67       vscale_array[col] = dx;
68     }
69     ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
70     ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr);
71   }
72 
73   nz = 0;
74   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
75   for (k=0; k<coloring->ncolors; k++) { /*  Loop over each color */
76     coloring->currentcolor = k;
77 
78     /* Compute w3 = x1 + dx */
79     ierr = VecCopy(x1,w3);CHKERRQ(ierr);
80     dy_i = dy;
81     for (i=0; i<bs; i++) {              /* Loop over a block of columns */
82       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
83       for (l=0; l<coloring->ncolumns[k]; l++) {
84         col            = i + bs*coloring->columns[k][l];
85         w3_array[col] += vscale_array[col];
86         if (i) {
87           w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */
88         }
89       }
90       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
91 
92       /* Evaluate function w2 = F(w3) - F(x1) */
93       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
94       ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */
95       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
96       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
97       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
98       ierr = VecResetArray(w2);CHKERRQ(ierr);
99       dy_i += N; /* points to dy+i*N */
100     }
101 
102     /* Loop over row blocks, putting dy/dx into Jacobian matrix */
103     for (l=0; l<coloring->nrows[k]; l++) {
104       row     = bs*Jentry[nz].row;
105       col     = bs*Jentry[nz].col;
106       valaddr = Jentry[nz].valaddr;
107       nz++;
108       spidx = 0;
109       dy_i  = dy;
110       for (i=0; i<bs; i++) {   /* column of the block */
111         for (j=0; j<bs; j++) { /* row of the block */
112           valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i];
113         }
114         dy_i += N; /* points to dy+i*N */
115       }
116     }
117   } /* endof for each color */
118   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
119 
120   coloring->currentcolor = -1;
121   PetscFunctionReturn(0);
122 }
123 
124 extern PetscErrorCode  MatFDColoringApply_AIJ_new(Mat,MatFDColoring,Vec,MatStructure*,void*);
125 /* also used for SeqBAIJ matrices */
126 #undef __FUNCT__
127 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new"
128 PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c)
129 {
130   PetscErrorCode ierr;
131   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
132   const PetscInt *is,*row,*ci,*cj;
133   PetscInt       nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz;
134   IS             *isa;
135   PetscBool      isBAIJ;
136   Mat_SeqAIJ     *spA = (Mat_SeqAIJ*)mat->data;
137   PetscScalar    *A_val=spA->a;
138   PetscScalar    **valaddrhit;
139   MatEntry       *Jentry;
140 
141   PetscFunctionBegin;
142   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
143 
144   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
145      SeqBAIJ calls this routine, thus check it */
146   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
147   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
148   if (!isBAIJ) {
149     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
150   }
151   N         = mat->cmap->N/bs;
152   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
153   c->N      = mat->cmap->N/bs;
154   c->m      = mat->rmap->N/bs;
155   c->rstart = 0;
156 
157   c->ncolors = nis;
158   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
159   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
160   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
161   ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
162 
163   ierr       = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr);
164   ierr       = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr);
165   c->matentry = Jentry;
166 
167   if (isBAIJ) {
168     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
169   } else {
170     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
171   }
172 
173   ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr);
174   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
175 
176   nz = 0;
177   for (i=0; i<nis; i++) { /* loop over colors */
178     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
179     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
180 
181     c->ncolumns[i] = n;
182     if (n) {
183       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
184       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
185       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
186     } else {
187       c->columns[i] = 0;
188     }
189 
190     /* fast, crude version requires O(N*N) work */
191     bs2   = bs*bs;
192     nrows = 0;
193     for (j=0; j<n; j++) {  /* loop over columns */
194       col    = is[j];
195       row    = cj + ci[col];
196       m      = ci[col+1] - ci[col];
197       nrows += m;
198       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
199         rowhit[*row]       = col + 1;
200         valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]];
201       }
202     }
203     c->nrows[i] = nrows; /* total num of rows for this color */
204 
205     for (j=0; j<N; j++) { /* loop over rows */
206       if (rowhit[j]) {
207         Jentry[nz].row     = j;              /* local row index */
208         Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
209         Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
210         nz++;
211         rowhit[j] = 0.0;                     /* zero rowhit for reuse */
212       }
213     }
214     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
215   }
216   if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz);
217 
218   if (isBAIJ) {
219     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
220   } else {
221     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
222   }
223   ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
224   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
225 
226   c->ctype                  = IS_COLORING_GHOSTED;
227   if (isBAIJ) {
228     mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ;
229     ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr);
230   } else {
231     mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new;
232   }
233   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
239 /*
240     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
241     This is why it has the ugly code with the MatGetBlockSize()
242 */
243 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
244 {
245   PetscErrorCode ierr;
246   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
247   const PetscInt *is,*rows,*ci,*cj;
248   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
249   IS             *isa;
250   PetscBool      done,flg = PETSC_FALSE;
251   PetscBool      flg1;
252   PetscBool      new_impl=PETSC_FALSE;
253 
254   PetscFunctionBegin;
255   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
256   if (new_impl){
257     ierr =  MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr);
258     PetscFunctionReturn(0);
259   }
260   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
261 
262   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
263   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
264      SeqBAIJ calls this routine, thus check it */
265   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
266   if (flg1) {
267     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
268   }
269 
270   N         = mat->cmap->N/bs;
271   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
272   c->N      = mat->cmap->N/bs;
273   c->m      = mat->rmap->N/bs;
274   c->rstart = 0;
275 
276   c->ncolors = nis;
277   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
278   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
279   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
280   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
281   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
282 
283   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
284   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
285 
286   /*
287      Temporary option to allow for debugging/testing
288   */
289   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
290 
291   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
292   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
293 
294   for (i=0; i<nis; i++) {
295     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
296     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
297 
298     c->ncolumns[i] = n;
299     if (n) {
300       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
301       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
302     } else {
303       c->columns[i] = 0;
304     }
305 
306     if (!flg) { /* ------------------------------------------------------------------------------*/
307       /* fast, crude version requires O(N*N) work */
308       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
309       /* loop over columns*/
310       for (j=0; j<n; j++) {
311         col  = is[j];
312         rows = cj + ci[col];
313         m    = ci[col+1] - ci[col];
314         /* loop over columns marking them in rowhit */
315         for (k=0; k<m; k++) {
316           rowhit[*rows++] = col + 1;
317         }
318       }
319       /* count the number of hits */
320       nrows = 0;
321       for (j=0; j<N; j++) {
322         if (rowhit[j]) nrows++;
323       }
324       c->nrows[i] = nrows;
325       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
326       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
327       nrows       = 0;
328       for (j=0; j<N; j++) {
329         if (rowhit[j]) {
330           c->rows[i][nrows]          = j;
331           c->columnsforrow[i][nrows] = rowhit[j] - 1;
332           nrows++;
333         }
334       }
335     } else {  /*-------------------------------------------------------------------------------*/
336       /* slow version, using rowhit as a linked list */
337       PetscInt currentcol,fm,mfm;
338       rowhit[N] = N;
339       nrows     = 0;
340       /* loop over columns */
341       for (j=0; j<n; j++) {
342         col  = is[j];
343         rows = cj + ci[col];
344         m    = ci[col+1] - ci[col];
345         /* loop over columns marking them in rowhit */
346         fm = N;    /* fm points to first entry in linked list */
347         for (k=0; k<m; k++) {
348           currentcol = *rows++;
349           /* is it already in the list? */
350           do {
351             mfm = fm;
352             fm  = rowhit[fm];
353           } while (fm < currentcol);
354           /* not in list so add it */
355           if (fm != currentcol) {
356             nrows++;
357             columnsforrow[currentcol] = col;
358             /* next three lines insert new entry into linked list */
359             rowhit[mfm]        = currentcol;
360             rowhit[currentcol] = fm;
361             fm                 = currentcol;
362             /* fm points to present position in list since we know the columns are sorted */
363           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
364         }
365       }
366       c->nrows[i] = nrows;
367       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
368       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
369       /* now store the linked list of rows into c->rows[i] */
370       nrows = 0;
371       fm    = rowhit[N];
372       do {
373         c->rows[i][nrows]            = fm;
374         c->columnsforrow[i][nrows++] = columnsforrow[fm];
375         fm                           = rowhit[fm];
376       } while (fm < N);
377     } /* ---------------------------------------------------------------------------------------*/
378     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
379   }
380   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
381 
382   ierr = PetscFree(rowhit);CHKERRQ(ierr);
383   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
384 
385   /* Optimize by adding the vscale, and scaleforrow[][] fields */
386   /*
387        see the version for MPIAIJ
388   */
389   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
390   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
391   for (k=0; k<c->ncolors; k++) {
392     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
393     for (l=0; l<c->nrows[k]; l++) {
394       col = c->columnsforrow[k][l];
395 
396       c->vscaleforrow[k][l] = col;
397     }
398   }
399   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403