xref: /petsc/src/mat/impls/aij/mpi/fdmpiaij.c (revision 1cc06b555e92f8ec64db10330b8bbd830e5bc876)
1d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h>
2c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
30d1c53f1SHong Zhang #include <../src/mat/impls/baij/mpi/mpibaij.h>
4af0996ceSBarry Smith #include <petsc/private/isimpl.h>
50d1c53f1SHong Zhang 
6589ce454SStefano Zampini static PetscErrorCode MatFDColoringMarkHost_AIJ(Mat J)
7589ce454SStefano Zampini {
8589ce454SStefano Zampini   PetscBool    isseqAIJ, ismpiAIJ;
9589ce454SStefano Zampini   PetscScalar *v;
10589ce454SStefano Zampini 
11589ce454SStefano Zampini   PetscFunctionBegin;
12589ce454SStefano Zampini   PetscCall(PetscObjectBaseTypeCompare((PetscObject)J, MATMPIAIJ, &ismpiAIJ));
13589ce454SStefano Zampini   PetscCall(PetscObjectBaseTypeCompare((PetscObject)J, MATSEQAIJ, &isseqAIJ));
14589ce454SStefano Zampini   if (isseqAIJ) {
15589ce454SStefano Zampini     PetscCall(MatSeqAIJGetArrayWrite(J, &v));
16589ce454SStefano Zampini     PetscCall(MatSeqAIJRestoreArrayWrite(J, &v));
17589ce454SStefano Zampini   } else if (ismpiAIJ) {
18589ce454SStefano Zampini     Mat dJ, oJ;
19589ce454SStefano Zampini 
20589ce454SStefano Zampini     PetscCall(MatMPIAIJGetSeqAIJ(J, &dJ, &oJ, NULL));
21589ce454SStefano Zampini     PetscCall(MatSeqAIJGetArrayWrite(dJ, &v));
22589ce454SStefano Zampini     PetscCall(MatSeqAIJRestoreArrayWrite(dJ, &v));
23589ce454SStefano Zampini     PetscCall(MatSeqAIJGetArrayWrite(oJ, &v));
24589ce454SStefano Zampini     PetscCall(MatSeqAIJRestoreArrayWrite(oJ, &v));
25589ce454SStefano Zampini   }
26589ce454SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
27589ce454SStefano Zampini }
28589ce454SStefano Zampini 
29d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
30d71ae5a4SJacob Faibussowitsch {
310d1c53f1SHong Zhang   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
320d1c53f1SHong Zhang   PetscInt           k, cstart, cend, l, row, col, nz, spidx, i, j;
335edff71fSBarry Smith   PetscScalar        dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy;
340d1c53f1SHong Zhang   PetscScalar       *vscale_array;
355edff71fSBarry Smith   const PetscScalar *xx;
360d1c53f1SHong Zhang   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
370d1c53f1SHong Zhang   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
380d1c53f1SHong Zhang   void              *fctx  = coloring->fctx;
390d1c53f1SHong Zhang   PetscInt           ctype = coloring->ctype, nxloc, nrows_k;
400d1c53f1SHong Zhang   PetscScalar       *valaddr;
410d1c53f1SHong Zhang   MatEntry          *Jentry  = coloring->matentry;
420df34763SHong Zhang   MatEntry2         *Jentry2 = coloring->matentry2;
430d1c53f1SHong Zhang   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
440d1c53f1SHong Zhang   PetscInt           bs = J->rmap->bs;
450d1c53f1SHong Zhang 
460d1c53f1SHong Zhang   PetscFunctionBegin;
479566063dSJacob Faibussowitsch   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
480d1c53f1SHong Zhang   /* (1) Set w1 = F(x1) */
490d1c53f1SHong Zhang   if (!coloring->fset) {
509566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0));
519566063dSJacob Faibussowitsch     PetscCall((*f)(sctx, x1, w1, fctx));
529566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0));
530d1c53f1SHong Zhang   } else {
540d1c53f1SHong Zhang     coloring->fset = PETSC_FALSE;
550d1c53f1SHong Zhang   }
560d1c53f1SHong Zhang 
570d1c53f1SHong Zhang   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
589566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(x1, &nxloc));
590d1c53f1SHong Zhang   if (coloring->htype[0] == 'w') {
600d1c53f1SHong Zhang     /* vscale = dx is a constant scalar */
619566063dSJacob Faibussowitsch     PetscCall(VecNorm(x1, NORM_2, &unorm));
620d1c53f1SHong Zhang     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
630d1c53f1SHong Zhang   } else {
649566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x1, &xx));
659566063dSJacob Faibussowitsch     PetscCall(VecGetArray(vscale, &vscale_array));
660d1c53f1SHong Zhang     for (col = 0; col < nxloc; col++) {
670d1c53f1SHong Zhang       dx = xx[col];
680d1c53f1SHong Zhang       if (PetscAbsScalar(dx) < umin) {
690d1c53f1SHong Zhang         if (PetscRealPart(dx) >= 0.0) dx = umin;
700d1c53f1SHong Zhang         else if (PetscRealPart(dx) < 0.0) dx = -umin;
710d1c53f1SHong Zhang       }
720d1c53f1SHong Zhang       dx *= epsilon;
730d1c53f1SHong Zhang       vscale_array[col] = 1.0 / dx;
740d1c53f1SHong Zhang     }
759566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x1, &xx));
769566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(vscale, &vscale_array));
770d1c53f1SHong Zhang   }
78684f2004SHong Zhang   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
799566063dSJacob Faibussowitsch     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
809566063dSJacob Faibussowitsch     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
810d1c53f1SHong Zhang   }
820d1c53f1SHong Zhang 
830d1c53f1SHong Zhang   /* (3) Loop over each color */
840d1c53f1SHong Zhang   if (!coloring->w3) {
859566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(x1, &coloring->w3));
86ce911d59SRichard Tran Mills     /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */
879566063dSJacob Faibussowitsch     PetscCall(VecBindToCPU(coloring->w3, PETSC_TRUE));
880d1c53f1SHong Zhang   }
890d1c53f1SHong Zhang   w3 = coloring->w3;
900d1c53f1SHong Zhang 
919566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
9248a46eb9SPierre Jolivet   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
930d1c53f1SHong Zhang   nz = 0;
940d1c53f1SHong Zhang   for (k = 0; k < ncolors; k++) {
950d1c53f1SHong Zhang     coloring->currentcolor = k;
960d1c53f1SHong Zhang 
970d1c53f1SHong Zhang     /*
980d1c53f1SHong Zhang       (3-1) Loop over each column associated with color
990d1c53f1SHong Zhang       adding the perturbation to the vector w3 = x1 + dx.
1000d1c53f1SHong Zhang     */
1019566063dSJacob Faibussowitsch     PetscCall(VecCopy(x1, w3));
1020d1c53f1SHong Zhang     dy_i = dy;
1030d1c53f1SHong Zhang     for (i = 0; i < bs; i++) { /* Loop over a block of columns */
1049566063dSJacob Faibussowitsch       PetscCall(VecGetArray(w3, &w3_array));
1050d1c53f1SHong Zhang       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
1060d1c53f1SHong Zhang       if (coloring->htype[0] == 'w') {
1070d1c53f1SHong Zhang         for (l = 0; l < ncolumns[k]; l++) {
1080d1c53f1SHong Zhang           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
1090d1c53f1SHong Zhang           w3_array[col] += 1.0 / dx;
110684f2004SHong Zhang           if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */
1110d1c53f1SHong Zhang         }
1120d1c53f1SHong Zhang       } else {                  /* htype == 'ds' */
1130d1c53f1SHong Zhang         vscale_array -= cstart; /* shift pointer so global index can be used */
1140d1c53f1SHong Zhang         for (l = 0; l < ncolumns[k]; l++) {
115f8c2866eSHong Zhang           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
1160d1c53f1SHong Zhang           w3_array[col] += 1.0 / vscale_array[col];
117684f2004SHong Zhang           if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */
1180d1c53f1SHong Zhang         }
1190d1c53f1SHong Zhang         vscale_array += cstart;
1200d1c53f1SHong Zhang       }
1210d1c53f1SHong Zhang       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
1229566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(w3, &w3_array));
1230d1c53f1SHong Zhang 
1240d1c53f1SHong Zhang       /*
1250d1c53f1SHong Zhang        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
1260d1c53f1SHong Zhang                            w2 = F(x1 + dx) - F(x1)
1270d1c53f1SHong Zhang        */
1289566063dSJacob Faibussowitsch       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
1299566063dSJacob Faibussowitsch       PetscCall(VecPlaceArray(w2, dy_i)); /* place w2 to the array dy_i */
1309566063dSJacob Faibussowitsch       PetscCall((*f)(sctx, w3, w2, fctx));
1319566063dSJacob Faibussowitsch       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
1329566063dSJacob Faibussowitsch       PetscCall(VecAXPY(w2, -1.0, w1));
1339566063dSJacob Faibussowitsch       PetscCall(VecResetArray(w2));
1340d1c53f1SHong Zhang       dy_i += nxloc; /* points to dy+i*nxloc */
1350d1c53f1SHong Zhang     }
1360d1c53f1SHong Zhang 
1370d1c53f1SHong Zhang     /*
1380d1c53f1SHong Zhang      (3-3) Loop over rows of vector, putting results into Jacobian matrix
1390d1c53f1SHong Zhang     */
1400d1c53f1SHong Zhang     nrows_k = nrows[k];
1410d1c53f1SHong Zhang     if (coloring->htype[0] == 'w') {
1420d1c53f1SHong Zhang       for (l = 0; l < nrows_k; l++) {
1430df34763SHong Zhang         row     = bs * Jentry2[nz].row; /* local row index */
1440df34763SHong Zhang         valaddr = Jentry2[nz++].valaddr;
1450d1c53f1SHong Zhang         spidx   = 0;
1460d1c53f1SHong Zhang         dy_i    = dy;
1470d1c53f1SHong Zhang         for (i = 0; i < bs; i++) {   /* column of the block */
1480d1c53f1SHong Zhang           for (j = 0; j < bs; j++) { /* row of the block */
1490d1c53f1SHong Zhang             valaddr[spidx++] = dy_i[row + j] * dx;
1500d1c53f1SHong Zhang           }
151f8c2866eSHong Zhang           dy_i += nxloc; /* points to dy+i*nxloc */
1520d1c53f1SHong Zhang         }
1530d1c53f1SHong Zhang       }
1540d1c53f1SHong Zhang     } else { /* htype == 'ds' */
1550d1c53f1SHong Zhang       for (l = 0; l < nrows_k; l++) {
1560d1c53f1SHong Zhang         row     = bs * Jentry[nz].row; /* local row index */
1570d1c53f1SHong Zhang         col     = bs * Jentry[nz].col; /* local column index */
158684f2004SHong Zhang         valaddr = Jentry[nz++].valaddr;
159f8c2866eSHong Zhang         spidx   = 0;
160f8c2866eSHong Zhang         dy_i    = dy;
161f8c2866eSHong Zhang         for (i = 0; i < bs; i++) {   /* column of the block */
162f8c2866eSHong Zhang           for (j = 0; j < bs; j++) { /* row of the block */
163f8c2866eSHong Zhang             valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i];
164f8c2866eSHong Zhang           }
165f8c2866eSHong Zhang           dy_i += nxloc; /* points to dy+i*nxloc */
166f8c2866eSHong Zhang         }
1670d1c53f1SHong Zhang       }
1680d1c53f1SHong Zhang     }
1690d1c53f1SHong Zhang   }
1709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1719566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
17248a46eb9SPierre Jolivet   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
1730d1c53f1SHong Zhang 
1740d1c53f1SHong Zhang   coloring->currentcolor = -1;
1759566063dSJacob Faibussowitsch   PetscCall(VecBindToCPU(x1, PETSC_FALSE));
1763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1770d1c53f1SHong Zhang }
178a64fbb32SBarry Smith 
179531c7667SBarry Smith /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */
180d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
181d71ae5a4SJacob Faibussowitsch {
182fcd7ac73SHong Zhang   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
183a2f2d239SHong Zhang   PetscInt           k, cstart, cend, l, row, col, nz;
1845edff71fSBarry Smith   PetscScalar        dx = 0.0, *y, *w3_array;
1855edff71fSBarry Smith   const PetscScalar *xx;
186fcd7ac73SHong Zhang   PetscScalar       *vscale_array;
187fcd7ac73SHong Zhang   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
1888bc97078SHong Zhang   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
189fcd7ac73SHong Zhang   void              *fctx  = coloring->fctx;
19091e7fa0fSBarry Smith   ISColoringType     ctype = coloring->ctype;
19191e7fa0fSBarry Smith   PetscInt           nxloc, nrows_k;
192573f477fSHong Zhang   MatEntry          *Jentry  = coloring->matentry;
1930df34763SHong Zhang   MatEntry2         *Jentry2 = coloring->matentry2;
1948bc97078SHong Zhang   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
195b294de21SRichard Tran Mills   PetscBool          alreadyboundtocpu;
196fcd7ac73SHong Zhang 
197fcd7ac73SHong Zhang   PetscFunctionBegin;
198589ce454SStefano Zampini   PetscCall(MatFDColoringMarkHost_AIJ(J));
1999566063dSJacob Faibussowitsch   PetscCall(VecBoundToCPU(x1, &alreadyboundtocpu));
2009566063dSJacob Faibussowitsch   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
20108401ef6SPierre Jolivet   PetscCheck(!(ctype == IS_COLORING_LOCAL) || !(J->ops->fdcoloringapply == MatFDColoringApply_AIJ), PetscObjectComm((PetscObject)J), PETSC_ERR_SUP, "Must call MatColoringUseDM() with IS_COLORING_LOCAL");
2028bc97078SHong Zhang   /* (1) Set w1 = F(x1) */
203fcd7ac73SHong Zhang   if (!coloring->fset) {
2049566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
2059566063dSJacob Faibussowitsch     PetscCall((*f)(sctx, x1, w1, fctx));
2069566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
207fcd7ac73SHong Zhang   } else {
208fcd7ac73SHong Zhang     coloring->fset = PETSC_FALSE;
209fcd7ac73SHong Zhang   }
210fcd7ac73SHong Zhang 
2118bc97078SHong Zhang   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
212f6af9589SHong Zhang   if (coloring->htype[0] == 'w') {
213684f2004SHong Zhang     /* vscale = 1./dx is a constant scalar */
2149566063dSJacob Faibussowitsch     PetscCall(VecNorm(x1, NORM_2, &unorm));
215c53567a0SHong Zhang     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
21670e7395fSHong Zhang   } else {
2179566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(x1, &nxloc));
2189566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x1, &xx));
2199566063dSJacob Faibussowitsch     PetscCall(VecGetArray(vscale, &vscale_array));
22074d3cef9SHong Zhang     for (col = 0; col < nxloc; col++) {
221fcd7ac73SHong Zhang       dx = xx[col];
22274d3cef9SHong Zhang       if (PetscAbsScalar(dx) < umin) {
22374d3cef9SHong Zhang         if (PetscRealPart(dx) >= 0.0) dx = umin;
22474d3cef9SHong Zhang         else if (PetscRealPart(dx) < 0.0) dx = -umin;
22574d3cef9SHong Zhang       }
226fcd7ac73SHong Zhang       dx *= epsilon;
22774d3cef9SHong Zhang       vscale_array[col] = 1.0 / dx;
228f6af9589SHong Zhang     }
2299566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x1, &xx));
2309566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(vscale, &vscale_array));
23170e7395fSHong Zhang   }
232684f2004SHong Zhang   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
2339566063dSJacob Faibussowitsch     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
2349566063dSJacob Faibussowitsch     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
235fcd7ac73SHong Zhang   }
236fcd7ac73SHong Zhang 
2378bc97078SHong Zhang   /* (3) Loop over each color */
238aa624791SPierre Jolivet   if (!coloring->w3) PetscCall(VecDuplicate(x1, &coloring->w3));
2398bc97078SHong Zhang   w3 = coloring->w3;
240fcd7ac73SHong Zhang 
2419566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
24248a46eb9SPierre Jolivet   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
2438bc97078SHong Zhang   nz = 0;
244e2c857f8SHong Zhang 
245b3e1f37bSHong Zhang   if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
246b3e1f37bSHong Zhang     PetscInt     i, m = J->rmap->n, nbcols, bcols = coloring->bcols;
247e2c857f8SHong Zhang     PetscScalar *dy = coloring->dy, *dy_k;
248e2c857f8SHong Zhang 
249e2c857f8SHong Zhang     nbcols = 0;
250e2c857f8SHong Zhang     for (k = 0; k < ncolors; k += bcols) {
251e2c857f8SHong Zhang       /*
252e2c857f8SHong Zhang        (3-1) Loop over each column associated with color
253e2c857f8SHong Zhang        adding the perturbation to the vector w3 = x1 + dx.
254e2c857f8SHong Zhang        */
255e2c857f8SHong Zhang 
256e2c857f8SHong Zhang       dy_k = dy;
257e2c857f8SHong Zhang       if (k + bcols > ncolors) bcols = ncolors - k;
258e2c857f8SHong Zhang       for (i = 0; i < bcols; i++) {
259ceef8a49SBarry Smith         coloring->currentcolor = k + i;
260e2c857f8SHong Zhang 
2619566063dSJacob Faibussowitsch         PetscCall(VecCopy(x1, w3));
2629566063dSJacob Faibussowitsch         PetscCall(VecGetArray(w3, &w3_array));
263e2c857f8SHong Zhang         if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
264e2c857f8SHong Zhang         if (coloring->htype[0] == 'w') {
265e2c857f8SHong Zhang           for (l = 0; l < ncolumns[k + i]; l++) {
266e2c857f8SHong Zhang             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
267e2c857f8SHong Zhang             w3_array[col] += 1.0 / dx;
268e2c857f8SHong Zhang           }
269e2c857f8SHong Zhang         } else {                  /* htype == 'ds' */
270e2c857f8SHong Zhang           vscale_array -= cstart; /* shift pointer so global index can be used */
271e2c857f8SHong Zhang           for (l = 0; l < ncolumns[k + i]; l++) {
272e2c857f8SHong Zhang             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
273e2c857f8SHong Zhang             w3_array[col] += 1.0 / vscale_array[col];
274e2c857f8SHong Zhang           }
275e2c857f8SHong Zhang           vscale_array += cstart;
276e2c857f8SHong Zhang         }
277e2c857f8SHong Zhang         if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
2789566063dSJacob Faibussowitsch         PetscCall(VecRestoreArray(w3, &w3_array));
279e2c857f8SHong Zhang 
280e2c857f8SHong Zhang         /*
281e2c857f8SHong Zhang          (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
282e2c857f8SHong Zhang                            w2 = F(x1 + dx) - F(x1)
283e2c857f8SHong Zhang          */
2849566063dSJacob Faibussowitsch         PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
2859566063dSJacob Faibussowitsch         PetscCall(VecPlaceArray(w2, dy_k)); /* place w2 to the array dy_i */
2869566063dSJacob Faibussowitsch         PetscCall((*f)(sctx, w3, w2, fctx));
2879566063dSJacob Faibussowitsch         PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
2889566063dSJacob Faibussowitsch         PetscCall(VecAXPY(w2, -1.0, w1));
2899566063dSJacob Faibussowitsch         PetscCall(VecResetArray(w2));
290e2c857f8SHong Zhang         dy_k += m; /* points to dy+i*nxloc */
291e2c857f8SHong Zhang       }
292e2c857f8SHong Zhang 
293e2c857f8SHong Zhang       /*
294e2c857f8SHong Zhang        (3-3) Loop over block rows of vector, putting results into Jacobian matrix
295e2c857f8SHong Zhang        */
296d880da65SHong Zhang       nrows_k = nrows[nbcols++];
297d880da65SHong Zhang 
298e2c857f8SHong Zhang       if (coloring->htype[0] == 'w') {
299e2c857f8SHong Zhang         for (l = 0; l < nrows_k; l++) {
3000df34763SHong Zhang           row = Jentry2[nz].row; /* local row index */
3015059b303SJunchao Zhang                                  /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in
3025059b303SJunchao Zhang              another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html
3035059b303SJunchao Zhang            */
3045059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX)
3055059b303SJunchao Zhang           PetscScalar *tmp = Jentry2[nz].valaddr;
3065059b303SJunchao Zhang           *tmp             = dy[row] * dx;
3075059b303SJunchao Zhang #else
3085059b303SJunchao Zhang           *(Jentry2[nz].valaddr) = dy[row] * dx;
3095059b303SJunchao Zhang #endif
3105059b303SJunchao Zhang           nz++;
311e2c857f8SHong Zhang         }
312e2c857f8SHong Zhang       } else { /* htype == 'ds' */
313e2c857f8SHong Zhang         for (l = 0; l < nrows_k; l++) {
314e2c857f8SHong Zhang           row = Jentry[nz].row; /* local row index */
3155059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
3165059b303SJunchao Zhang           PetscScalar *tmp = Jentry[nz].valaddr;
3175059b303SJunchao Zhang           *tmp             = dy[row] * vscale_array[Jentry[nz].col];
3185059b303SJunchao Zhang #else
319d880da65SHong Zhang           *(Jentry[nz].valaddr)  = dy[row] * vscale_array[Jentry[nz].col];
3205059b303SJunchao Zhang #endif
321e2c857f8SHong Zhang           nz++;
322e2c857f8SHong Zhang         }
323e2c857f8SHong Zhang       }
324e2c857f8SHong Zhang     }
325b3e1f37bSHong Zhang   } else { /* bcols == 1 */
3268bc97078SHong Zhang     for (k = 0; k < ncolors; k++) {
3278bc97078SHong Zhang       coloring->currentcolor = k;
3289e917edbSHong Zhang 
329fcd7ac73SHong Zhang       /*
3308bc97078SHong Zhang        (3-1) Loop over each column associated with color
331f6af9589SHong Zhang        adding the perturbation to the vector w3 = x1 + dx.
332fcd7ac73SHong Zhang        */
3339566063dSJacob Faibussowitsch       PetscCall(VecCopy(x1, w3));
3349566063dSJacob Faibussowitsch       PetscCall(VecGetArray(w3, &w3_array));
335a2f2d239SHong Zhang       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
336b039d6c4SHong Zhang       if (coloring->htype[0] == 'w') {
3378bc97078SHong Zhang         for (l = 0; l < ncolumns[k]; l++) {
338f6af9589SHong Zhang           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
3399e917edbSHong Zhang           w3_array[col] += 1.0 / dx;
3409e917edbSHong Zhang         }
341b039d6c4SHong Zhang       } else {                  /* htype == 'ds' */
342a2f2d239SHong Zhang         vscale_array -= cstart; /* shift pointer so global index can be used */
343b039d6c4SHong Zhang         for (l = 0; l < ncolumns[k]; l++) {
344b039d6c4SHong Zhang           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
345a2f2d239SHong Zhang           w3_array[col] += 1.0 / vscale_array[col];
34670e7395fSHong Zhang         }
347a2f2d239SHong Zhang         vscale_array += cstart;
348fcd7ac73SHong Zhang       }
349a2f2d239SHong Zhang       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
3509566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(w3, &w3_array));
3519e917edbSHong Zhang 
352fcd7ac73SHong Zhang       /*
3538bc97078SHong Zhang        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
354fcd7ac73SHong Zhang                            w2 = F(x1 + dx) - F(x1)
355fcd7ac73SHong Zhang        */
3569566063dSJacob Faibussowitsch       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
3579566063dSJacob Faibussowitsch       PetscCall((*f)(sctx, w3, w2, fctx));
3589566063dSJacob Faibussowitsch       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
3599566063dSJacob Faibussowitsch       PetscCall(VecAXPY(w2, -1.0, w1));
3609e917edbSHong Zhang 
3618bc97078SHong Zhang       /*
3628bc97078SHong Zhang        (3-3) Loop over rows of vector, putting results into Jacobian matrix
3638bc97078SHong Zhang        */
364c53567a0SHong Zhang       nrows_k = nrows[k];
3659566063dSJacob Faibussowitsch       PetscCall(VecGetArray(w2, &y));
366b039d6c4SHong Zhang       if (coloring->htype[0] == 'w') {
367c53567a0SHong Zhang         for (l = 0; l < nrows_k; l++) {
3680df34763SHong Zhang           row = Jentry2[nz].row; /* local row index */
3695059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX)   /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
3705059b303SJunchao Zhang           PetscScalar *tmp = Jentry2[nz].valaddr;
3715059b303SJunchao Zhang           *tmp             = y[row] * dx;
3725059b303SJunchao Zhang #else
3735059b303SJunchao Zhang           *(Jentry2[nz].valaddr) = y[row] * dx;
3745059b303SJunchao Zhang #endif
3755059b303SJunchao Zhang           nz++;
3769e917edbSHong Zhang         }
377b039d6c4SHong Zhang       } else { /* htype == 'ds' */
378c53567a0SHong Zhang         for (l = 0; l < nrows_k; l++) {
379b039d6c4SHong Zhang           row = Jentry[nz].row; /* local row index */
3805059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
3815059b303SJunchao Zhang           PetscScalar *tmp = Jentry[nz].valaddr;
3825059b303SJunchao Zhang           *tmp             = y[row] * vscale_array[Jentry[nz].col];
3835059b303SJunchao Zhang #else
384b039d6c4SHong Zhang           *(Jentry[nz].valaddr)  = y[row] * vscale_array[Jentry[nz].col];
3855059b303SJunchao Zhang #endif
386573f477fSHong Zhang           nz++;
387fcd7ac73SHong Zhang         }
388b039d6c4SHong Zhang       }
3899566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(w2, &y));
3908bc97078SHong Zhang     }
391b3e1f37bSHong Zhang   }
392b3e1f37bSHong Zhang 
3939566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
3949566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
39548a46eb9SPierre Jolivet   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
396fcd7ac73SHong Zhang   coloring->currentcolor = -1;
3979566063dSJacob Faibussowitsch   if (!alreadyboundtocpu) PetscCall(VecBindToCPU(x1, PETSC_FALSE));
3983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
399fcd7ac73SHong Zhang }
400fcd7ac73SHong Zhang 
401d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
402d71ae5a4SJacob Faibussowitsch {
403fcd7ac73SHong Zhang   PetscMPIInt            size, *ncolsonproc, *disp, nn;
404e3225e9dSHong Zhang   PetscInt               i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb;
40572c15787SHong Zhang   const PetscInt        *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL;
406071fcb05SBarry Smith   PetscInt               nis = iscoloring->n, nctot, *cols, tmp = 0;
407fcd7ac73SHong Zhang   ISLocalToGlobalMapping map   = mat->cmap->mapping;
408a8971b87SHong Zhang   PetscInt               ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx;
4090d1c53f1SHong Zhang   Mat                    A, B;
410e3225e9dSHong Zhang   PetscScalar           *A_val, *B_val, **valaddrhit;
411573f477fSHong Zhang   MatEntry              *Jentry;
4120df34763SHong Zhang   MatEntry2             *Jentry2;
413d4002b98SHong Zhang   PetscBool              isBAIJ, isSELL;
414531e53bdSHong Zhang   PetscInt               bcols = c->bcols;
4150d1c53f1SHong Zhang #if defined(PETSC_USE_CTABLE)
416eec179cfSJacob Faibussowitsch   PetscHMapI colmap = NULL;
4170d1c53f1SHong Zhang #else
4180d1c53f1SHong Zhang   PetscInt *colmap = NULL;      /* local col number of off-diag col */
4190d1c53f1SHong Zhang #endif
420fcd7ac73SHong Zhang 
421fcd7ac73SHong Zhang   PetscFunctionBegin;
4225bdb020cSBarry Smith   if (ctype == IS_COLORING_LOCAL) {
42328b400f6SJacob Faibussowitsch     PetscCheck(map, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
4249566063dSJacob Faibussowitsch     PetscCall(ISLocalToGlobalMappingGetIndices(map, &ltog));
42572c15787SHong Zhang   }
426fcd7ac73SHong Zhang 
4279566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
4289566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
4299566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
430e3225e9dSHong Zhang   if (isBAIJ) {
431195687c5SHong Zhang     Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
4326c145f34SHong Zhang     Mat_SeqBAIJ *spA, *spB;
4339371c9d4SSatish Balay     A     = baij->A;
4349371c9d4SSatish Balay     spA   = (Mat_SeqBAIJ *)A->data;
4359371c9d4SSatish Balay     A_val = spA->a;
4369371c9d4SSatish Balay     B     = baij->B;
4379371c9d4SSatish Balay     spB   = (Mat_SeqBAIJ *)B->data;
4389371c9d4SSatish Balay     B_val = spB->a;
4390d1c53f1SHong Zhang     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
44048a46eb9SPierre Jolivet     if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
441097e26fcSJed Brown     colmap = baij->colmap;
4429566063dSJacob Faibussowitsch     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
4439566063dSJacob Faibussowitsch     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
444195687c5SHong Zhang 
445195687c5SHong Zhang     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
446195687c5SHong Zhang       PetscInt *garray;
4479566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(B->cmap->n, &garray));
448195687c5SHong Zhang       for (i = 0; i < baij->B->cmap->n / bs; i++) {
449ad540459SPierre Jolivet         for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j;
450195687c5SHong Zhang       }
4519566063dSJacob Faibussowitsch       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale));
4529566063dSJacob Faibussowitsch       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
4539566063dSJacob Faibussowitsch       PetscCall(PetscFree(garray));
454195687c5SHong Zhang     }
455d4002b98SHong Zhang   } else if (isSELL) {
456d4002b98SHong Zhang     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
457d4002b98SHong Zhang     Mat_SeqSELL *spA, *spB;
4589371c9d4SSatish Balay     A     = sell->A;
4599371c9d4SSatish Balay     spA   = (Mat_SeqSELL *)A->data;
4609371c9d4SSatish Balay     A_val = spA->val;
4619371c9d4SSatish Balay     B     = sell->B;
4629371c9d4SSatish Balay     spB   = (Mat_SeqSELL *)B->data;
4639371c9d4SSatish Balay     B_val = spB->val;
464f4b713efSHong Zhang     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
465d4002b98SHong Zhang     if (!sell->colmap) {
466f4b713efSHong Zhang       /* Allow access to data structures of local part of matrix
467f4b713efSHong Zhang        - creates aij->colmap which maps global column number to local number in part B */
4689566063dSJacob Faibussowitsch       PetscCall(MatCreateColmap_MPISELL_Private(mat));
469f4b713efSHong Zhang     }
470d4002b98SHong Zhang     colmap = sell->colmap;
4719566063dSJacob Faibussowitsch     PetscCall(MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
4729566063dSJacob Faibussowitsch     PetscCall(MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
473f4b713efSHong Zhang 
474f4b713efSHong Zhang     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
475f4b713efSHong Zhang 
476f4b713efSHong Zhang     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
4779566063dSJacob Faibussowitsch       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale));
4789566063dSJacob Faibussowitsch       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
479f4b713efSHong Zhang     }
480e3225e9dSHong Zhang   } else {
481e3225e9dSHong Zhang     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
482e3225e9dSHong Zhang     Mat_SeqAIJ *spA, *spB;
4839371c9d4SSatish Balay     A     = aij->A;
4849371c9d4SSatish Balay     spA   = (Mat_SeqAIJ *)A->data;
4859371c9d4SSatish Balay     A_val = spA->a;
4869371c9d4SSatish Balay     B     = aij->B;
4879371c9d4SSatish Balay     spB   = (Mat_SeqAIJ *)B->data;
4889371c9d4SSatish Balay     B_val = spB->a;
489e3225e9dSHong Zhang     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
490e3225e9dSHong Zhang     if (!aij->colmap) {
491e3225e9dSHong Zhang       /* Allow access to data structures of local part of matrix
492e3225e9dSHong Zhang        - creates aij->colmap which maps global column number to local number in part B */
4939566063dSJacob Faibussowitsch       PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
494e3225e9dSHong Zhang     }
495097e26fcSJed Brown     colmap = aij->colmap;
4969566063dSJacob Faibussowitsch     PetscCall(MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
4979566063dSJacob Faibussowitsch     PetscCall(MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
498e3225e9dSHong Zhang 
499e3225e9dSHong Zhang     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
500195687c5SHong Zhang 
501195687c5SHong Zhang     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
5029566063dSJacob Faibussowitsch       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale));
5039566063dSJacob Faibussowitsch       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
504195687c5SHong Zhang     }
5050d1c53f1SHong Zhang   }
5060d1c53f1SHong Zhang 
5070d1c53f1SHong Zhang   m      = mat->rmap->n / bs;
5080d1c53f1SHong Zhang   cstart = mat->cmap->rstart / bs;
5090d1c53f1SHong Zhang   cend   = mat->cmap->rend / bs;
510fcd7ac73SHong Zhang 
5119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
5129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nis, &c->nrows));
513fcd7ac73SHong Zhang 
5140df34763SHong Zhang   if (c->htype[0] == 'd') {
5159566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &Jentry));
5168bc97078SHong Zhang     c->matentry = Jentry;
5170df34763SHong Zhang   } else if (c->htype[0] == 'w') {
5189566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &Jentry2));
5190df34763SHong Zhang     c->matentry2 = Jentry2;
5200df34763SHong Zhang   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");
521a774a6f1SHong Zhang 
5229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit));
523fcd7ac73SHong Zhang   nz = 0;
5249566063dSJacob Faibussowitsch   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));
525071fcb05SBarry Smith 
526071fcb05SBarry Smith   if (ctype == IS_COLORING_GLOBAL) {
5279566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
5289566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(size, &ncolsonproc, size, &disp));
529071fcb05SBarry Smith   }
530071fcb05SBarry Smith 
531d3825b63SHong Zhang   for (i = 0; i < nis; i++) { /* for each local color */
5329566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(c->isa[i], &n));
5339566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(c->isa[i], &is));
534fcd7ac73SHong Zhang 
535fcd7ac73SHong Zhang     c->ncolumns[i] = n; /* local number of columns of this color on this process */
536071fcb05SBarry Smith     c->columns[i]  = (PetscInt *)is;
537fcd7ac73SHong Zhang 
538fcd7ac73SHong Zhang     if (ctype == IS_COLORING_GLOBAL) {
539d3825b63SHong Zhang       /* Determine nctot, the total (parallel) number of columns of this color */
540d3825b63SHong Zhang       /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
5419566063dSJacob Faibussowitsch       PetscCall(PetscMPIIntCast(n, &nn));
5429566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat)));
5439371c9d4SSatish Balay       nctot = 0;
5449371c9d4SSatish Balay       for (j = 0; j < size; j++) nctot += ncolsonproc[j];
54548a46eb9SPierre Jolivet       if (!nctot) PetscCall(PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n"));
546fcd7ac73SHong Zhang 
547fcd7ac73SHong Zhang       disp[0] = 0;
548ad540459SPierre Jolivet       for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1];
549fcd7ac73SHong Zhang 
550d3825b63SHong Zhang       /* Get cols, the complete list of columns for this color on each process */
5519566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nctot + 1, &cols));
5529566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat)));
5535bdb020cSBarry Smith     } else if (ctype == IS_COLORING_LOCAL) {
554fcd7ac73SHong Zhang       /* Determine local number of columns of this color on this process, including ghost points */
555fcd7ac73SHong Zhang       nctot = n;
556071fcb05SBarry Smith       cols  = (PetscInt *)is;
557fcd7ac73SHong Zhang     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type");
558fcd7ac73SHong Zhang 
5591b97d346SHong Zhang     /* Mark all rows affect by these columns */
5609566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(rowhit, m));
5610d1c53f1SHong Zhang     bs2     = bs * bs;
5624b2e90caSHong Zhang     nrows_i = 0;
5631b97d346SHong Zhang     for (j = 0; j < nctot; j++) { /* loop over columns*/
5645bdb020cSBarry Smith       if (ctype == IS_COLORING_LOCAL) {
565fcd7ac73SHong Zhang         col = ltog[cols[j]];
566fcd7ac73SHong Zhang       } else {
567fcd7ac73SHong Zhang         col = cols[j];
568fcd7ac73SHong Zhang       }
569e3225e9dSHong Zhang       if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
570071fcb05SBarry Smith         tmp   = A_ci[col - cstart];
571071fcb05SBarry Smith         row   = A_cj + tmp;
572071fcb05SBarry Smith         nrows = A_ci[col - cstart + 1] - tmp;
5734b2e90caSHong Zhang         nrows_i += nrows;
574071fcb05SBarry Smith 
575fcd7ac73SHong Zhang         /* loop over columns of A marking them in rowhit */
576d3825b63SHong Zhang         for (k = 0; k < nrows; k++) {
577d3825b63SHong Zhang           /* set valaddrhit for part A */
578071fcb05SBarry Smith           spidx            = bs2 * spidxA[tmp + k];
579e3225e9dSHong Zhang           valaddrhit[*row] = &A_val[spidx];
580a774a6f1SHong Zhang           rowhit[*row++]   = col - cstart + 1; /* local column index */
581fcd7ac73SHong Zhang         }
582e3225e9dSHong Zhang       } else { /* column is in B, off-diagonal block of mat */
583fcd7ac73SHong Zhang #if defined(PETSC_USE_CTABLE)
584eec179cfSJacob Faibussowitsch         PetscCall(PetscHMapIGetWithDefault(colmap, col + 1, 0, &colb));
585fcd7ac73SHong Zhang         colb--;
586fcd7ac73SHong Zhang #else
5870d1c53f1SHong Zhang         colb = colmap[col] - 1; /* local column index */
588fcd7ac73SHong Zhang #endif
589fcd7ac73SHong Zhang         if (colb == -1) {
590d3825b63SHong Zhang           nrows = 0;
591fcd7ac73SHong Zhang         } else {
5920d1c53f1SHong Zhang           colb  = colb / bs;
593071fcb05SBarry Smith           tmp   = B_ci[colb];
594071fcb05SBarry Smith           row   = B_cj + tmp;
595071fcb05SBarry Smith           nrows = B_ci[colb + 1] - tmp;
596fcd7ac73SHong Zhang         }
5974b2e90caSHong Zhang         nrows_i += nrows;
598fcd7ac73SHong Zhang         /* loop over columns of B marking them in rowhit */
599d3825b63SHong Zhang         for (k = 0; k < nrows; k++) {
600d3825b63SHong Zhang           /* set valaddrhit for part B */
601071fcb05SBarry Smith           spidx            = bs2 * spidxB[tmp + k];
602e3225e9dSHong Zhang           valaddrhit[*row] = &B_val[spidx];
60370e7395fSHong Zhang           rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
604fcd7ac73SHong Zhang         }
605fcd7ac73SHong Zhang       }
606fcd7ac73SHong Zhang     }
6074b2e90caSHong Zhang     c->nrows[i] = nrows_i;
6088bc97078SHong Zhang 
6090df34763SHong Zhang     if (c->htype[0] == 'd') {
610d3825b63SHong Zhang       for (j = 0; j < m; j++) {
611fcd7ac73SHong Zhang         if (rowhit[j]) {
612573f477fSHong Zhang           Jentry[nz].row     = j;             /* local row index */
613573f477fSHong Zhang           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
614573f477fSHong Zhang           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
615573f477fSHong Zhang           nz++;
616fcd7ac73SHong Zhang         }
617fcd7ac73SHong Zhang       }
6180df34763SHong Zhang     } else { /* c->htype == 'wp' */
6190df34763SHong Zhang       for (j = 0; j < m; j++) {
6200df34763SHong Zhang         if (rowhit[j]) {
6210df34763SHong Zhang           Jentry2[nz].row     = j;             /* local row index */
6220df34763SHong Zhang           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
6230df34763SHong Zhang           nz++;
6240df34763SHong Zhang         }
6250df34763SHong Zhang       }
6260df34763SHong Zhang     }
62748a46eb9SPierre Jolivet     if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree(cols));
628fcd7ac73SHong Zhang   }
62948a46eb9SPierre Jolivet   if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree2(ncolsonproc, disp));
630fcd7ac73SHong Zhang 
631a8971b87SHong Zhang   if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
6329566063dSJacob Faibussowitsch     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
633531e53bdSHong Zhang   }
634531e53bdSHong Zhang 
6350d1c53f1SHong Zhang   if (isBAIJ) {
6369566063dSJacob Faibussowitsch     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
6379566063dSJacob Faibussowitsch     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
6389566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
639d4002b98SHong Zhang   } else if (isSELL) {
6409566063dSJacob Faibussowitsch     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
6419566063dSJacob Faibussowitsch     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
6420d1c53f1SHong Zhang   } else {
6439566063dSJacob Faibussowitsch     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
6449566063dSJacob Faibussowitsch     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
6450d1c53f1SHong Zhang   }
6460d1c53f1SHong Zhang 
6479566063dSJacob Faibussowitsch   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
6489566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rowhit, valaddrhit));
649a8971b87SHong Zhang 
65048a46eb9SPierre Jolivet   if (ctype == IS_COLORING_LOCAL) PetscCall(ISLocalToGlobalMappingRestoreIndices(map, &ltog));
6519566063dSJacob Faibussowitsch   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
6523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
653fcd7ac73SHong Zhang }
65493dfae19SHong Zhang 
655d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
656d71ae5a4SJacob Faibussowitsch {
657a8971b87SHong Zhang   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
658d4002b98SHong Zhang   PetscBool isBAIJ, isSELL;
659531e53bdSHong Zhang 
66093dfae19SHong Zhang   PetscFunctionBegin;
661531e53bdSHong Zhang   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
662531e53bdSHong Zhang    bcols is chosen s.t. dy-array takes 50% of memory space as mat */
6639566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
6649566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
6659566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
666b8dfa574SBarry Smith   if (isBAIJ || m == 0) {
667a8971b87SHong Zhang     c->brows = m;
668a8971b87SHong Zhang     c->bcols = 1;
669d4002b98SHong Zhang   } else if (isSELL) {
670f4b713efSHong Zhang     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
671d4002b98SHong Zhang     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
672d4002b98SHong Zhang     Mat_SeqSELL *spA, *spB;
673f4b713efSHong Zhang     Mat          A, B;
674f4b713efSHong Zhang     PetscInt     nz, brows, bcols;
675f4b713efSHong Zhang     PetscReal    mem;
676f4b713efSHong Zhang 
677d4002b98SHong Zhang     bs = 1; /* only bs=1 is supported for MPISELL matrix */
678f4b713efSHong Zhang 
6799371c9d4SSatish Balay     A     = sell->A;
6809371c9d4SSatish Balay     spA   = (Mat_SeqSELL *)A->data;
6819371c9d4SSatish Balay     B     = sell->B;
6829371c9d4SSatish Balay     spB   = (Mat_SeqSELL *)B->data;
683f4b713efSHong Zhang     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
684f4b713efSHong Zhang     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
685f4b713efSHong Zhang     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
686f4b713efSHong Zhang     brows = 1000 / bcols;
687f4b713efSHong Zhang     if (bcols > nis) bcols = nis;
688f4b713efSHong Zhang     if (brows == 0 || brows > m) brows = m;
689f4b713efSHong Zhang     c->brows = brows;
690f4b713efSHong Zhang     c->bcols = bcols;
691531e53bdSHong Zhang   } else { /* mpiaij matrix */
692531e53bdSHong Zhang     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
693531e53bdSHong Zhang     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
694531e53bdSHong Zhang     Mat_SeqAIJ *spA, *spB;
695531e53bdSHong Zhang     Mat         A, B;
696a8971b87SHong Zhang     PetscInt    nz, brows, bcols;
697531e53bdSHong Zhang     PetscReal   mem;
698531e53bdSHong Zhang 
699531e53bdSHong Zhang     bs = 1; /* only bs=1 is supported for MPIAIJ matrix */
700531e53bdSHong Zhang 
7019371c9d4SSatish Balay     A     = aij->A;
7029371c9d4SSatish Balay     spA   = (Mat_SeqAIJ *)A->data;
7039371c9d4SSatish Balay     B     = aij->B;
7049371c9d4SSatish Balay     spB   = (Mat_SeqAIJ *)B->data;
705531e53bdSHong Zhang     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
706531e53bdSHong Zhang     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
707531e53bdSHong Zhang     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
708531e53bdSHong Zhang     brows = 1000 / bcols;
709531e53bdSHong Zhang     if (bcols > nis) bcols = nis;
710531e53bdSHong Zhang     if (brows == 0 || brows > m) brows = m;
711531e53bdSHong Zhang     c->brows = brows;
712531e53bdSHong Zhang     c->bcols = bcols;
713531e53bdSHong Zhang   }
714a8971b87SHong Zhang 
715a8971b87SHong Zhang   c->M       = mat->rmap->N / bs; /* set the global rows and columns and local rows */
716a8971b87SHong Zhang   c->N       = mat->cmap->N / bs;
717a8971b87SHong Zhang   c->m       = mat->rmap->n / bs;
718a8971b87SHong Zhang   c->rstart  = mat->rmap->rstart / bs;
719a8971b87SHong Zhang   c->ncolors = nis;
7203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
72193dfae19SHong Zhang }
722bdaf1daeSBarry Smith 
723bdaf1daeSBarry Smith /*@C
724bdaf1daeSBarry Smith 
72511a5261eSBarry Smith     MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat`
726bdaf1daeSBarry Smith 
727c3339decSBarry Smith    Collective
728bdaf1daeSBarry Smith 
729bdaf1daeSBarry Smith    Input Parameters:
730bdaf1daeSBarry Smith +    J - the sparse matrix
73111a5261eSBarry Smith .    coloring - created with `MatFDColoringCreate()` and a local coloring
732bdaf1daeSBarry Smith -    y - column major storage of matrix values with one color of values per column, the number of rows of y should match
7332ef1f0ffSBarry Smith          the number of local rows of `J` and the number of columns is the number of colors.
734bdaf1daeSBarry Smith 
735bdaf1daeSBarry Smith    Level: intermediate
736bdaf1daeSBarry Smith 
7372ef1f0ffSBarry Smith    Notes:
7382ef1f0ffSBarry Smith    The matrix in compressed color format may come from an automatic differentiation code
739bdaf1daeSBarry Smith 
74011a5261eSBarry Smith    The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring
741bdaf1daeSBarry Smith 
742*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()`
743bdaf1daeSBarry Smith @*/
744d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y)
745d71ae5a4SJacob Faibussowitsch {
746bdaf1daeSBarry Smith   MatEntry2      *Jentry2;
747bdaf1daeSBarry Smith   PetscInt        row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0;
748bdaf1daeSBarry Smith   const PetscInt *nrows;
749bdaf1daeSBarry Smith   PetscBool       eq;
750bdaf1daeSBarry Smith 
751bdaf1daeSBarry Smith   PetscFunctionBegin;
752bdaf1daeSBarry Smith   PetscValidHeaderSpecific(J, MAT_CLASSID, 1);
753bdaf1daeSBarry Smith   PetscValidHeaderSpecific(coloring, MAT_FDCOLORING_CLASSID, 2);
7549566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompareId((PetscObject)J, coloring->matid, &eq));
75528b400f6SJacob Faibussowitsch   PetscCheck(eq, PetscObjectComm((PetscObject)J), PETSC_ERR_ARG_WRONG, "Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()");
756bdaf1daeSBarry Smith   Jentry2 = coloring->matentry2;
757bdaf1daeSBarry Smith   nrows   = coloring->nrows;
758bdaf1daeSBarry Smith   ncolors = coloring->ncolors;
759bdaf1daeSBarry Smith   bcols   = coloring->bcols;
760bdaf1daeSBarry Smith 
761bdaf1daeSBarry Smith   for (i = 0; i < ncolors; i += bcols) {
762bdaf1daeSBarry Smith     nrows_k = nrows[nbcols++];
763bdaf1daeSBarry Smith     for (l = 0; l < nrows_k; l++) {
764bdaf1daeSBarry Smith       row                      = Jentry2[nz].row; /* local row index */
765bdaf1daeSBarry Smith       *(Jentry2[nz++].valaddr) = y[row];
766bdaf1daeSBarry Smith     }
767bdaf1daeSBarry Smith     y += bcols * coloring->m;
768bdaf1daeSBarry Smith   }
7699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
7709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
7713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
772bdaf1daeSBarry Smith }
773