xref: /petsc/src/mat/impls/aij/mpi/fdmpiaij.c (revision 30bab8dbce35fbb76248bd0fea1b2bc98f8a4f04)
1 #include <../src/mat/impls/sell/mpi/mpisell.h>
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3 #include <../src/mat/impls/baij/mpi/mpibaij.h>
4 #include <petsc/private/isimpl.h>
5 
6 static PetscErrorCode MatFDColoringMarkHost_AIJ(Mat J)
7 {
8   PetscBool    isseqAIJ, ismpiAIJ;
9   PetscScalar *v;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscObjectBaseTypeCompare((PetscObject)J, MATMPIAIJ, &ismpiAIJ));
13   PetscCall(PetscObjectBaseTypeCompare((PetscObject)J, MATSEQAIJ, &isseqAIJ));
14   if (isseqAIJ) {
15     PetscCall(MatSeqAIJGetArrayWrite(J, &v));
16     PetscCall(MatSeqAIJRestoreArrayWrite(J, &v));
17   } else if (ismpiAIJ) {
18     Mat dJ, oJ;
19 
20     PetscCall(MatMPIAIJGetSeqAIJ(J, &dJ, &oJ, NULL));
21     PetscCall(MatSeqAIJGetArrayWrite(dJ, &v));
22     PetscCall(MatSeqAIJRestoreArrayWrite(dJ, &v));
23     PetscCall(MatSeqAIJGetArrayWrite(oJ, &v));
24     PetscCall(MatSeqAIJRestoreArrayWrite(oJ, &v));
25   }
26   PetscFunctionReturn(PETSC_SUCCESS);
27 }
28 
29 PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
30 {
31   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
32   PetscInt           k, cstart, cend, l, row, col, nz, spidx, i, j;
33   PetscScalar        dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy;
34   PetscScalar       *vscale_array;
35   const PetscScalar *xx;
36   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
37   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
38   void              *fctx  = coloring->fctx;
39   PetscInt           ctype = coloring->ctype, nxloc, nrows_k;
40   PetscScalar       *valaddr;
41   MatEntry          *Jentry  = coloring->matentry;
42   MatEntry2         *Jentry2 = coloring->matentry2;
43   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
44   PetscInt           bs = J->rmap->bs;
45 
46   PetscFunctionBegin;
47   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
48   /* (1) Set w1 = F(x1) */
49   if (!coloring->fset) {
50     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0));
51     PetscCall((*f)(sctx, x1, w1, fctx));
52     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0));
53   } else {
54     coloring->fset = PETSC_FALSE;
55   }
56 
57   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
58   PetscCall(VecGetLocalSize(x1, &nxloc));
59   if (coloring->htype[0] == 'w') {
60     /* vscale = dx is a constant scalar */
61     PetscCall(VecNorm(x1, NORM_2, &unorm));
62     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
63   } else {
64     PetscCall(VecGetArrayRead(x1, &xx));
65     PetscCall(VecGetArray(vscale, &vscale_array));
66     for (col = 0; col < nxloc; col++) {
67       dx = xx[col];
68       if (PetscAbsScalar(dx) < umin) {
69         if (PetscRealPart(dx) >= 0.0) dx = umin;
70         else if (PetscRealPart(dx) < 0.0) dx = -umin;
71       }
72       dx *= epsilon;
73       vscale_array[col] = 1.0 / dx;
74     }
75     PetscCall(VecRestoreArrayRead(x1, &xx));
76     PetscCall(VecRestoreArray(vscale, &vscale_array));
77   }
78   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
79     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
80     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
81   }
82 
83   /* (3) Loop over each color */
84   if (!coloring->w3) {
85     PetscCall(VecDuplicate(x1, &coloring->w3));
86     /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */
87     PetscCall(VecBindToCPU(coloring->w3, PETSC_TRUE));
88   }
89   w3 = coloring->w3;
90 
91   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
92   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
93   nz = 0;
94   for (k = 0; k < ncolors; k++) {
95     coloring->currentcolor = k;
96 
97     /*
98       (3-1) Loop over each column associated with color
99       adding the perturbation to the vector w3 = x1 + dx.
100     */
101     PetscCall(VecCopy(x1, w3));
102     dy_i = dy;
103     for (i = 0; i < bs; i++) { /* Loop over a block of columns */
104       PetscCall(VecGetArray(w3, &w3_array));
105       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
106       if (coloring->htype[0] == 'w') {
107         for (l = 0; l < ncolumns[k]; l++) {
108           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
109           w3_array[col] += 1.0 / dx;
110           if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */
111         }
112       } else {                  /* htype == 'ds' */
113         vscale_array -= cstart; /* shift pointer so global index can be used */
114         for (l = 0; l < ncolumns[k]; l++) {
115           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
116           w3_array[col] += 1.0 / vscale_array[col];
117           if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */
118         }
119         vscale_array += cstart;
120       }
121       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
122       PetscCall(VecRestoreArray(w3, &w3_array));
123 
124       /*
125        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
126                            w2 = F(x1 + dx) - F(x1)
127        */
128       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
129       PetscCall(VecPlaceArray(w2, dy_i)); /* place w2 to the array dy_i */
130       PetscCall((*f)(sctx, w3, w2, fctx));
131       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
132       PetscCall(VecAXPY(w2, -1.0, w1));
133       PetscCall(VecResetArray(w2));
134       dy_i += nxloc; /* points to dy+i*nxloc */
135     }
136 
137     /*
138      (3-3) Loop over rows of vector, putting results into Jacobian matrix
139     */
140     nrows_k = nrows[k];
141     if (coloring->htype[0] == 'w') {
142       for (l = 0; l < nrows_k; l++) {
143         row     = bs * Jentry2[nz].row; /* local row index */
144         valaddr = Jentry2[nz++].valaddr;
145         spidx   = 0;
146         dy_i    = dy;
147         for (i = 0; i < bs; i++) {   /* column of the block */
148           for (j = 0; j < bs; j++) { /* row of the block */
149             valaddr[spidx++] = dy_i[row + j] * dx;
150           }
151           dy_i += nxloc; /* points to dy+i*nxloc */
152         }
153       }
154     } else { /* htype == 'ds' */
155       for (l = 0; l < nrows_k; l++) {
156         row     = bs * Jentry[nz].row; /* local row index */
157         col     = bs * Jentry[nz].col; /* local column index */
158         valaddr = Jentry[nz++].valaddr;
159         spidx   = 0;
160         dy_i    = dy;
161         for (i = 0; i < bs; i++) {   /* column of the block */
162           for (j = 0; j < bs; j++) { /* row of the block */
163             valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i];
164           }
165           dy_i += nxloc; /* points to dy+i*nxloc */
166         }
167       }
168     }
169   }
170   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
172   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
173 
174   coloring->currentcolor = -1;
175   PetscCall(VecBindToCPU(x1, PETSC_FALSE));
176   PetscFunctionReturn(PETSC_SUCCESS);
177 }
178 
179 /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */
180 PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
181 {
182   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
183   PetscInt           k, cstart, cend, l, row, col, nz;
184   PetscScalar        dx = 0.0, *y, *w3_array;
185   const PetscScalar *xx;
186   PetscScalar       *vscale_array;
187   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
188   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
189   void              *fctx  = coloring->fctx;
190   ISColoringType     ctype = coloring->ctype;
191   PetscInt           nxloc, nrows_k;
192   MatEntry          *Jentry  = coloring->matentry;
193   MatEntry2         *Jentry2 = coloring->matentry2;
194   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
195   PetscBool          alreadyboundtocpu;
196 
197   PetscFunctionBegin;
198   PetscCall(MatFDColoringMarkHost_AIJ(J));
199   PetscCall(VecBoundToCPU(x1, &alreadyboundtocpu));
200   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
201   PetscCheck(!(ctype == IS_COLORING_LOCAL) || !(J->ops->fdcoloringapply == MatFDColoringApply_AIJ), PetscObjectComm((PetscObject)J), PETSC_ERR_SUP, "Must call MatColoringUseDM() with IS_COLORING_LOCAL");
202   /* (1) Set w1 = F(x1) */
203   if (!coloring->fset) {
204     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
205     PetscCall((*f)(sctx, x1, w1, fctx));
206     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
207   } else {
208     coloring->fset = PETSC_FALSE;
209   }
210 
211   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
212   if (coloring->htype[0] == 'w') {
213     /* vscale = 1./dx is a constant scalar */
214     PetscCall(VecNorm(x1, NORM_2, &unorm));
215     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
216   } else {
217     PetscCall(VecGetLocalSize(x1, &nxloc));
218     PetscCall(VecGetArrayRead(x1, &xx));
219     PetscCall(VecGetArray(vscale, &vscale_array));
220     for (col = 0; col < nxloc; col++) {
221       dx = xx[col];
222       if (PetscAbsScalar(dx) < umin) {
223         if (PetscRealPart(dx) >= 0.0) dx = umin;
224         else if (PetscRealPart(dx) < 0.0) dx = -umin;
225       }
226       dx *= epsilon;
227       vscale_array[col] = 1.0 / dx;
228     }
229     PetscCall(VecRestoreArrayRead(x1, &xx));
230     PetscCall(VecRestoreArray(vscale, &vscale_array));
231   }
232   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
233     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
234     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
235   }
236 
237   /* (3) Loop over each color */
238   if (!coloring->w3) PetscCall(VecDuplicate(x1, &coloring->w3));
239   w3 = coloring->w3;
240 
241   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
242   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
243   nz = 0;
244 
245   if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
246     PetscInt     i, m = J->rmap->n, nbcols, bcols = coloring->bcols;
247     PetscScalar *dy = coloring->dy, *dy_k;
248 
249     nbcols = 0;
250     for (k = 0; k < ncolors; k += bcols) {
251       /*
252        (3-1) Loop over each column associated with color
253        adding the perturbation to the vector w3 = x1 + dx.
254        */
255 
256       dy_k = dy;
257       if (k + bcols > ncolors) bcols = ncolors - k;
258       for (i = 0; i < bcols; i++) {
259         coloring->currentcolor = k + i;
260 
261         PetscCall(VecCopy(x1, w3));
262         PetscCall(VecGetArray(w3, &w3_array));
263         if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
264         if (coloring->htype[0] == 'w') {
265           for (l = 0; l < ncolumns[k + i]; l++) {
266             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
267             w3_array[col] += 1.0 / dx;
268           }
269         } else {                  /* htype == 'ds' */
270           vscale_array -= cstart; /* shift pointer so global index can be used */
271           for (l = 0; l < ncolumns[k + i]; l++) {
272             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
273             w3_array[col] += 1.0 / vscale_array[col];
274           }
275           vscale_array += cstart;
276         }
277         if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
278         PetscCall(VecRestoreArray(w3, &w3_array));
279 
280         /*
281          (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
282                            w2 = F(x1 + dx) - F(x1)
283          */
284         PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
285         PetscCall(VecPlaceArray(w2, dy_k)); /* place w2 to the array dy_i */
286         PetscCall((*f)(sctx, w3, w2, fctx));
287         PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
288         PetscCall(VecAXPY(w2, -1.0, w1));
289         PetscCall(VecResetArray(w2));
290         dy_k += m; /* points to dy+i*nxloc */
291       }
292 
293       /*
294        (3-3) Loop over block rows of vector, putting results into Jacobian matrix
295        */
296       nrows_k = nrows[nbcols++];
297 
298       if (coloring->htype[0] == 'w') {
299         for (l = 0; l < nrows_k; l++) {
300           row = Jentry2[nz].row; /* local row index */
301                                  /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in
302              another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html
303            */
304 #if defined(PETSC_USE_COMPLEX)
305           PetscScalar *tmp = Jentry2[nz].valaddr;
306           *tmp             = dy[row] * dx;
307 #else
308           *(Jentry2[nz].valaddr) = dy[row] * dx;
309 #endif
310           nz++;
311         }
312       } else { /* htype == 'ds' */
313         for (l = 0; l < nrows_k; l++) {
314           row = Jentry[nz].row; /* local row index */
315 #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
316           PetscScalar *tmp = Jentry[nz].valaddr;
317           *tmp             = dy[row] * vscale_array[Jentry[nz].col];
318 #else
319           *(Jentry[nz].valaddr)  = dy[row] * vscale_array[Jentry[nz].col];
320 #endif
321           nz++;
322         }
323       }
324     }
325   } else { /* bcols == 1 */
326     for (k = 0; k < ncolors; k++) {
327       coloring->currentcolor = k;
328 
329       /*
330        (3-1) Loop over each column associated with color
331        adding the perturbation to the vector w3 = x1 + dx.
332        */
333       PetscCall(VecCopy(x1, w3));
334       PetscCall(VecGetArray(w3, &w3_array));
335       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
336       if (coloring->htype[0] == 'w') {
337         for (l = 0; l < ncolumns[k]; l++) {
338           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
339           w3_array[col] += 1.0 / dx;
340         }
341       } else {                  /* htype == 'ds' */
342         vscale_array -= cstart; /* shift pointer so global index can be used */
343         for (l = 0; l < ncolumns[k]; l++) {
344           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
345           w3_array[col] += 1.0 / vscale_array[col];
346         }
347         vscale_array += cstart;
348       }
349       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
350       PetscCall(VecRestoreArray(w3, &w3_array));
351 
352       /*
353        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
354                            w2 = F(x1 + dx) - F(x1)
355        */
356       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
357       PetscCall((*f)(sctx, w3, w2, fctx));
358       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
359       PetscCall(VecAXPY(w2, -1.0, w1));
360 
361       /*
362        (3-3) Loop over rows of vector, putting results into Jacobian matrix
363        */
364       nrows_k = nrows[k];
365       PetscCall(VecGetArray(w2, &y));
366       if (coloring->htype[0] == 'w') {
367         for (l = 0; l < nrows_k; l++) {
368           row = Jentry2[nz].row; /* local row index */
369 #if defined(PETSC_USE_COMPLEX)   /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
370           PetscScalar *tmp = Jentry2[nz].valaddr;
371           *tmp             = y[row] * dx;
372 #else
373           *(Jentry2[nz].valaddr) = y[row] * dx;
374 #endif
375           nz++;
376         }
377       } else { /* htype == 'ds' */
378         for (l = 0; l < nrows_k; l++) {
379           row = Jentry[nz].row; /* local row index */
380 #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
381           PetscScalar *tmp = Jentry[nz].valaddr;
382           *tmp             = y[row] * vscale_array[Jentry[nz].col];
383 #else
384           *(Jentry[nz].valaddr)  = y[row] * vscale_array[Jentry[nz].col];
385 #endif
386           nz++;
387         }
388       }
389       PetscCall(VecRestoreArray(w2, &y));
390     }
391   }
392 
393   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
394   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
395   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
396   coloring->currentcolor = -1;
397   if (!alreadyboundtocpu) PetscCall(VecBindToCPU(x1, PETSC_FALSE));
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
402 {
403   PetscMPIInt            size, *ncolsonproc, *disp, nn;
404   PetscInt               i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb;
405   const PetscInt        *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL;
406   PetscInt               nis = iscoloring->n, nctot, *cols, tmp = 0;
407   ISLocalToGlobalMapping map   = mat->cmap->mapping;
408   PetscInt               ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx;
409   Mat                    A, B;
410   PetscScalar           *A_val, *B_val, **valaddrhit;
411   MatEntry              *Jentry;
412   MatEntry2             *Jentry2;
413   PetscBool              isBAIJ, isSELL;
414   PetscInt               bcols = c->bcols;
415 #if defined(PETSC_USE_CTABLE)
416   PetscHMapI colmap = NULL;
417 #else
418   PetscInt *colmap = NULL;      /* local col number of off-diag col */
419 #endif
420 
421   PetscFunctionBegin;
422   if (ctype == IS_COLORING_LOCAL) {
423     PetscCheck(map, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
424     PetscCall(ISLocalToGlobalMappingGetIndices(map, &ltog));
425   }
426 
427   PetscCall(MatGetBlockSize(mat, &bs));
428   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
429   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
430   if (isBAIJ) {
431     Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
432     Mat_SeqBAIJ *spA, *spB;
433     A     = baij->A;
434     spA   = (Mat_SeqBAIJ *)A->data;
435     A_val = spA->a;
436     B     = baij->B;
437     spB   = (Mat_SeqBAIJ *)B->data;
438     B_val = spB->a;
439     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
440     if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
441     colmap = baij->colmap;
442     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
443     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
444 
445     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
446       PetscInt *garray;
447       PetscCall(PetscMalloc1(B->cmap->n, &garray));
448       for (i = 0; i < baij->B->cmap->n / bs; i++) {
449         for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j;
450       }
451       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale));
452       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
453       PetscCall(PetscFree(garray));
454     }
455   } else if (isSELL) {
456     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
457     Mat_SeqSELL *spA, *spB;
458     A     = sell->A;
459     spA   = (Mat_SeqSELL *)A->data;
460     A_val = spA->val;
461     B     = sell->B;
462     spB   = (Mat_SeqSELL *)B->data;
463     B_val = spB->val;
464     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
465     if (!sell->colmap) {
466       /* Allow access to data structures of local part of matrix
467        - creates aij->colmap which maps global column number to local number in part B */
468       PetscCall(MatCreateColmap_MPISELL_Private(mat));
469     }
470     colmap = sell->colmap;
471     PetscCall(MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
472     PetscCall(MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
473 
474     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
475 
476     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
477       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale));
478       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
479     }
480   } else {
481     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
482     Mat_SeqAIJ *spA, *spB;
483     A     = aij->A;
484     spA   = (Mat_SeqAIJ *)A->data;
485     A_val = spA->a;
486     B     = aij->B;
487     spB   = (Mat_SeqAIJ *)B->data;
488     B_val = spB->a;
489     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
490     if (!aij->colmap) {
491       /* Allow access to data structures of local part of matrix
492        - creates aij->colmap which maps global column number to local number in part B */
493       PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
494     }
495     colmap = aij->colmap;
496     PetscCall(MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
497     PetscCall(MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
498 
499     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
500 
501     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
502       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale));
503       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
504     }
505   }
506 
507   m      = mat->rmap->n / bs;
508   cstart = mat->cmap->rstart / bs;
509   cend   = mat->cmap->rend / bs;
510 
511   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
512   PetscCall(PetscMalloc1(nis, &c->nrows));
513 
514   if (c->htype[0] == 'd') {
515     PetscCall(PetscMalloc1(nz, &Jentry));
516     c->matentry = Jentry;
517   } else if (c->htype[0] == 'w') {
518     PetscCall(PetscMalloc1(nz, &Jentry2));
519     c->matentry2 = Jentry2;
520   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");
521 
522   PetscCall(PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit));
523   nz = 0;
524   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));
525 
526   if (ctype == IS_COLORING_GLOBAL) {
527     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
528     PetscCall(PetscMalloc2(size, &ncolsonproc, size, &disp));
529   }
530 
531   for (i = 0; i < nis; i++) { /* for each local color */
532     PetscCall(ISGetLocalSize(c->isa[i], &n));
533     PetscCall(ISGetIndices(c->isa[i], &is));
534 
535     c->ncolumns[i] = n; /* local number of columns of this color on this process */
536     c->columns[i]  = (PetscInt *)is;
537 
538     if (ctype == IS_COLORING_GLOBAL) {
539       /* Determine nctot, the total (parallel) number of columns of this color */
540       /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
541       PetscCall(PetscMPIIntCast(n, &nn));
542       PetscCallMPI(MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat)));
543       nctot = 0;
544       for (j = 0; j < size; j++) nctot += ncolsonproc[j];
545       if (!nctot) PetscCall(PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n"));
546 
547       disp[0] = 0;
548       for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1];
549 
550       /* Get cols, the complete list of columns for this color on each process */
551       PetscCall(PetscMalloc1(nctot + 1, &cols));
552       PetscCallMPI(MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat)));
553     } else if (ctype == IS_COLORING_LOCAL) {
554       /* Determine local number of columns of this color on this process, including ghost points */
555       nctot = n;
556       cols  = (PetscInt *)is;
557     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type");
558 
559     /* Mark all rows affect by these columns */
560     PetscCall(PetscArrayzero(rowhit, m));
561     bs2     = bs * bs;
562     nrows_i = 0;
563     for (j = 0; j < nctot; j++) { /* loop over columns*/
564       if (ctype == IS_COLORING_LOCAL) {
565         col = ltog[cols[j]];
566       } else {
567         col = cols[j];
568       }
569       if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
570         tmp   = A_ci[col - cstart];
571         row   = A_cj + tmp;
572         nrows = A_ci[col - cstart + 1] - tmp;
573         nrows_i += nrows;
574 
575         /* loop over columns of A marking them in rowhit */
576         for (k = 0; k < nrows; k++) {
577           /* set valaddrhit for part A */
578           spidx            = bs2 * spidxA[tmp + k];
579           valaddrhit[*row] = &A_val[spidx];
580           rowhit[*row++]   = col - cstart + 1; /* local column index */
581         }
582       } else { /* column is in B, off-diagonal block of mat */
583 #if defined(PETSC_USE_CTABLE)
584         PetscCall(PetscHMapIGetWithDefault(colmap, col + 1, 0, &colb));
585         colb--;
586 #else
587         colb = colmap[col] - 1; /* local column index */
588 #endif
589         if (colb == -1) {
590           nrows = 0;
591         } else {
592           colb  = colb / bs;
593           tmp   = B_ci[colb];
594           row   = B_cj + tmp;
595           nrows = B_ci[colb + 1] - tmp;
596         }
597         nrows_i += nrows;
598         /* loop over columns of B marking them in rowhit */
599         for (k = 0; k < nrows; k++) {
600           /* set valaddrhit for part B */
601           spidx            = bs2 * spidxB[tmp + k];
602           valaddrhit[*row] = &B_val[spidx];
603           rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
604         }
605       }
606     }
607     c->nrows[i] = nrows_i;
608 
609     if (c->htype[0] == 'd') {
610       for (j = 0; j < m; j++) {
611         if (rowhit[j]) {
612           Jentry[nz].row     = j;             /* local row index */
613           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
614           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
615           nz++;
616         }
617       }
618     } else { /* c->htype == 'wp' */
619       for (j = 0; j < m; j++) {
620         if (rowhit[j]) {
621           Jentry2[nz].row     = j;             /* local row index */
622           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
623           nz++;
624         }
625       }
626     }
627     if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree(cols));
628   }
629   if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree2(ncolsonproc, disp));
630 
631   if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
632     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
633   }
634 
635   if (isBAIJ) {
636     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
637     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
638     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
639   } else if (isSELL) {
640     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
641     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
642   } else {
643     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
644     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
645   }
646 
647   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
648   PetscCall(PetscFree2(rowhit, valaddrhit));
649 
650   if (ctype == IS_COLORING_LOCAL) PetscCall(ISLocalToGlobalMappingRestoreIndices(map, &ltog));
651   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
652   PetscFunctionReturn(PETSC_SUCCESS);
653 }
654 
655 PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
656 {
657   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
658   PetscBool isBAIJ, isSELL;
659 
660   PetscFunctionBegin;
661   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
662    bcols is chosen s.t. dy-array takes 50% of memory space as mat */
663   PetscCall(MatGetBlockSize(mat, &bs));
664   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
665   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
666   if (isBAIJ || m == 0) {
667     c->brows = m;
668     c->bcols = 1;
669   } else if (isSELL) {
670     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
671     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
672     Mat_SeqSELL *spA, *spB;
673     Mat          A, B;
674     PetscInt     nz, brows, bcols;
675     PetscReal    mem;
676 
677     bs = 1; /* only bs=1 is supported for MPISELL matrix */
678 
679     A     = sell->A;
680     spA   = (Mat_SeqSELL *)A->data;
681     B     = sell->B;
682     spB   = (Mat_SeqSELL *)B->data;
683     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
684     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
685     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
686     brows = 1000 / bcols;
687     if (bcols > nis) bcols = nis;
688     if (brows == 0 || brows > m) brows = m;
689     c->brows = brows;
690     c->bcols = bcols;
691   } else { /* mpiaij matrix */
692     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
693     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
694     Mat_SeqAIJ *spA, *spB;
695     Mat         A, B;
696     PetscInt    nz, brows, bcols;
697     PetscReal   mem;
698 
699     bs = 1; /* only bs=1 is supported for MPIAIJ matrix */
700 
701     A     = aij->A;
702     spA   = (Mat_SeqAIJ *)A->data;
703     B     = aij->B;
704     spB   = (Mat_SeqAIJ *)B->data;
705     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
706     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
707     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
708     brows = 1000 / bcols;
709     if (bcols > nis) bcols = nis;
710     if (brows == 0 || brows > m) brows = m;
711     c->brows = brows;
712     c->bcols = bcols;
713   }
714 
715   c->M       = mat->rmap->N / bs; /* set the global rows and columns and local rows */
716   c->N       = mat->cmap->N / bs;
717   c->m       = mat->rmap->n / bs;
718   c->rstart  = mat->rmap->rstart / bs;
719   c->ncolors = nis;
720   PetscFunctionReturn(PETSC_SUCCESS);
721 }
722 
723 /*@C
724 
725     MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat`
726 
727    Collective
728 
729    Input Parameters:
730 +    J - the sparse matrix
731 .    coloring - created with `MatFDColoringCreate()` and a local coloring
732 -    y - column major storage of matrix values with one color of values per column, the number of rows of y should match
733          the number of local rows of `J` and the number of columns is the number of colors.
734 
735    Level: intermediate
736 
737    Notes:
738    The matrix in compressed color format may come from an automatic differentiation code
739 
740    The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring
741 
742 .seealso: [](ch_matrices), `Mat`, `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()`
743 @*/
744 PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y)
745 {
746   MatEntry2      *Jentry2;
747   PetscInt        row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0;
748   const PetscInt *nrows;
749   PetscBool       eq;
750 
751   PetscFunctionBegin;
752   PetscValidHeaderSpecific(J, MAT_CLASSID, 1);
753   PetscValidHeaderSpecific(coloring, MAT_FDCOLORING_CLASSID, 2);
754   PetscCall(PetscObjectCompareId((PetscObject)J, coloring->matid, &eq));
755   PetscCheck(eq, PetscObjectComm((PetscObject)J), PETSC_ERR_ARG_WRONG, "Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()");
756   Jentry2 = coloring->matentry2;
757   nrows   = coloring->nrows;
758   ncolors = coloring->ncolors;
759   bcols   = coloring->bcols;
760 
761   for (i = 0; i < ncolors; i += bcols) {
762     nrows_k = nrows[nbcols++];
763     for (l = 0; l < nrows_k; l++) {
764       row                      = Jentry2[nz].row; /* local row index */
765       *(Jentry2[nz++].valaddr) = y[row];
766     }
767     y += bcols * coloring->m;
768   }
769   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
770   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773