xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2af0996ceSBarry Smith #include <petsc/private/vecimpl.h>
397929ea7SJunchao Zhang #include <petsc/private/sfimpl.h>
4af0996ceSBarry Smith #include <petsc/private/isimpl.h>
5c6db04a5SJed Brown #include <petscblaslapack.h>
60c312b8eSJed Brown #include <petscsf.h>
7bc8e477aSFande Kong #include <petsc/private/hashmapi.h>
88a729477SBarry Smith 
99371c9d4SSatish Balay PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
108a9c020eSBarry Smith   Mat B;
118a9c020eSBarry Smith 
128a9c020eSBarry Smith   PetscFunctionBegin;
138a9c020eSBarry Smith   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
148a9c020eSBarry Smith   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
158a9c020eSBarry Smith   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
16501b8e33SLisandro Dalcin   PetscCall(MatDestroy(&B));
178a9c020eSBarry Smith   PetscFunctionReturn(0);
188a9c020eSBarry Smith }
198a9c020eSBarry Smith 
209371c9d4SSatish Balay PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
218a9c020eSBarry Smith   Mat B;
228a9c020eSBarry Smith 
238a9c020eSBarry Smith   PetscFunctionBegin;
248a9c020eSBarry Smith   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
258a9c020eSBarry Smith   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
26501b8e33SLisandro Dalcin   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
278a9c020eSBarry Smith   PetscFunctionReturn(0);
288a9c020eSBarry Smith }
298a9c020eSBarry Smith 
3001bebe75SBarry Smith /*MC
3101bebe75SBarry Smith    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
3201bebe75SBarry Smith 
3311a5261eSBarry Smith    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
3411a5261eSBarry Smith    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
3511a5261eSBarry Smith   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
3601bebe75SBarry Smith   for communicators controlling multiple processes.  It is recommended that you call both of
3701bebe75SBarry Smith   the above preallocation routines for simplicity.
3801bebe75SBarry Smith 
3901bebe75SBarry Smith    Options Database Keys:
4011a5261eSBarry Smith . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
4101bebe75SBarry Smith 
4211a5261eSBarry Smith   Developer Note:
4311a5261eSBarry Smith     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
4401bebe75SBarry Smith    enough exist.
4501bebe75SBarry Smith 
4601bebe75SBarry Smith   Level: beginner
4701bebe75SBarry Smith 
4811a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
4901bebe75SBarry Smith M*/
5001bebe75SBarry Smith 
5101bebe75SBarry Smith /*MC
5201bebe75SBarry Smith    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
5301bebe75SBarry Smith 
5411a5261eSBarry Smith    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
5511a5261eSBarry Smith    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
5611a5261eSBarry Smith    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
5701bebe75SBarry Smith   for communicators controlling multiple processes.  It is recommended that you call both of
5801bebe75SBarry Smith   the above preallocation routines for simplicity.
5901bebe75SBarry Smith 
6001bebe75SBarry Smith    Options Database Keys:
6111a5261eSBarry Smith . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
6201bebe75SBarry Smith 
6301bebe75SBarry Smith   Level: beginner
6401bebe75SBarry Smith 
65c2e3fba1SPatrick Sanan .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
6601bebe75SBarry Smith M*/
6701bebe75SBarry Smith 
689371c9d4SSatish Balay static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) {
69f74ef234SStefano Zampini   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
70f74ef234SStefano Zampini 
71f74ef234SStefano Zampini   PetscFunctionBegin;
72f74ef234SStefano Zampini #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
73b470e4b4SRichard Tran Mills   A->boundtocpu = flg;
74f74ef234SStefano Zampini #endif
751baa6e33SBarry Smith   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
761baa6e33SBarry Smith   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
773120d049SRichard Tran Mills 
783120d049SRichard Tran Mills   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
793120d049SRichard Tran Mills    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
803120d049SRichard Tran Mills    * to differ from the parent matrix. */
811baa6e33SBarry Smith   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
821baa6e33SBarry Smith   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
833120d049SRichard Tran Mills 
84f74ef234SStefano Zampini   PetscFunctionReturn(0);
85f74ef234SStefano Zampini }
86f74ef234SStefano Zampini 
879371c9d4SSatish Balay PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) {
8826bda2c4Sstefano_zampini   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
8926bda2c4Sstefano_zampini 
9026bda2c4Sstefano_zampini   PetscFunctionBegin;
9146533700Sstefano_zampini   if (mat->A) {
929566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
939566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
9446533700Sstefano_zampini   }
9526bda2c4Sstefano_zampini   PetscFunctionReturn(0);
9626bda2c4Sstefano_zampini }
9726bda2c4Sstefano_zampini 
989371c9d4SSatish Balay PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) {
9927d4218bSShri Abhyankar   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
10027d4218bSShri Abhyankar   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
10127d4218bSShri Abhyankar   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
10227d4218bSShri Abhyankar   const PetscInt  *ia, *ib;
103ce496241SStefano Zampini   const MatScalar *aa, *bb, *aav, *bav;
10427d4218bSShri Abhyankar   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
10527d4218bSShri Abhyankar   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
10627d4218bSShri Abhyankar 
10727d4218bSShri Abhyankar   PetscFunctionBegin;
108f4259b30SLisandro Dalcin   *keptrows = NULL;
109ce496241SStefano Zampini 
11027d4218bSShri Abhyankar   ia = a->i;
11127d4218bSShri Abhyankar   ib = b->i;
1129566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
1139566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
11427d4218bSShri Abhyankar   for (i = 0; i < m; i++) {
11527d4218bSShri Abhyankar     na = ia[i + 1] - ia[i];
11627d4218bSShri Abhyankar     nb = ib[i + 1] - ib[i];
11727d4218bSShri Abhyankar     if (!na && !nb) {
11827d4218bSShri Abhyankar       cnt++;
11927d4218bSShri Abhyankar       goto ok1;
12027d4218bSShri Abhyankar     }
121ce496241SStefano Zampini     aa = aav + ia[i];
12227d4218bSShri Abhyankar     for (j = 0; j < na; j++) {
12327d4218bSShri Abhyankar       if (aa[j] != 0.0) goto ok1;
12427d4218bSShri Abhyankar     }
125ce496241SStefano Zampini     bb = bav + ib[i];
12627d4218bSShri Abhyankar     for (j = 0; j < nb; j++) {
12727d4218bSShri Abhyankar       if (bb[j] != 0.0) goto ok1;
12827d4218bSShri Abhyankar     }
12927d4218bSShri Abhyankar     cnt++;
13027d4218bSShri Abhyankar   ok1:;
13127d4218bSShri Abhyankar   }
1321c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
133ce496241SStefano Zampini   if (!n0rows) {
1349566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
1359566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
136ce496241SStefano Zampini     PetscFunctionReturn(0);
137ce496241SStefano Zampini   }
1389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
13927d4218bSShri Abhyankar   cnt = 0;
14027d4218bSShri Abhyankar   for (i = 0; i < m; i++) {
14127d4218bSShri Abhyankar     na = ia[i + 1] - ia[i];
14227d4218bSShri Abhyankar     nb = ib[i + 1] - ib[i];
14327d4218bSShri Abhyankar     if (!na && !nb) continue;
144ce496241SStefano Zampini     aa = aav + ia[i];
14527d4218bSShri Abhyankar     for (j = 0; j < na; j++) {
14627d4218bSShri Abhyankar       if (aa[j] != 0.0) {
14727d4218bSShri Abhyankar         rows[cnt++] = rstart + i;
14827d4218bSShri Abhyankar         goto ok2;
14927d4218bSShri Abhyankar       }
15027d4218bSShri Abhyankar     }
151ce496241SStefano Zampini     bb = bav + ib[i];
15227d4218bSShri Abhyankar     for (j = 0; j < nb; j++) {
15327d4218bSShri Abhyankar       if (bb[j] != 0.0) {
15427d4218bSShri Abhyankar         rows[cnt++] = rstart + i;
15527d4218bSShri Abhyankar         goto ok2;
15627d4218bSShri Abhyankar       }
15727d4218bSShri Abhyankar     }
15827d4218bSShri Abhyankar   ok2:;
15927d4218bSShri Abhyankar   }
1609566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
1619566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
1629566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
16327d4218bSShri Abhyankar   PetscFunctionReturn(0);
16427d4218bSShri Abhyankar }
16527d4218bSShri Abhyankar 
1669371c9d4SSatish Balay PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) {
16799e65526SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
16894342113SStefano Zampini   PetscBool   cong;
16999e65526SBarry Smith 
17099e65526SBarry Smith   PetscFunctionBegin;
1719566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(Y, &cong));
17294342113SStefano Zampini   if (Y->assembled && cong) {
1739566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(aij->A, D, is));
17499e65526SBarry Smith   } else {
1759566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet_Default(Y, D, is));
17699e65526SBarry Smith   }
17799e65526SBarry Smith   PetscFunctionReturn(0);
17899e65526SBarry Smith }
17999e65526SBarry Smith 
1809371c9d4SSatish Balay PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) {
181f1f41ecbSJed Brown   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
182f1f41ecbSJed Brown   PetscInt    i, rstart, nrows, *rows;
183f1f41ecbSJed Brown 
184f1f41ecbSJed Brown   PetscFunctionBegin;
1850298fd71SBarry Smith   *zrows = NULL;
1869566063dSJacob Faibussowitsch   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
1879566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
188f1f41ecbSJed Brown   for (i = 0; i < nrows; i++) rows[i] += rstart;
1899566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
190f1f41ecbSJed Brown   PetscFunctionReturn(0);
191f1f41ecbSJed Brown }
192f1f41ecbSJed Brown 
1939371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) {
1940716a85fSBarry Smith   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
195a873a8cdSSam Reynolds   PetscInt           i, m, n, *garray = aij->garray;
1960716a85fSBarry Smith   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
1970716a85fSBarry Smith   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
1980716a85fSBarry Smith   PetscReal         *work;
199ce496241SStefano Zampini   const PetscScalar *dummy;
2000716a85fSBarry Smith 
2010716a85fSBarry Smith   PetscFunctionBegin;
2029566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
2039566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &work));
2049566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
2059566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
2069566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
2079566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
208857cbf51SRichard Tran Mills   if (type == NORM_2) {
209ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
210ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
211857cbf51SRichard Tran Mills   } else if (type == NORM_1) {
212ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
213ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214857cbf51SRichard Tran Mills   } else if (type == NORM_INFINITY) {
215ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
216ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
217857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
219ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
220857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
221ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
222ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
223857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
224857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
2251c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2260716a85fSBarry Smith   } else {
2271c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2280716a85fSBarry Smith   }
2299566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
230857cbf51SRichard Tran Mills   if (type == NORM_2) {
231a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
232857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
233a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] /= m;
2340716a85fSBarry Smith   }
2350716a85fSBarry Smith   PetscFunctionReturn(0);
2360716a85fSBarry Smith }
2370716a85fSBarry Smith 
2389371c9d4SSatish Balay PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) {
239e52d2c62SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
240e52d2c62SBarry Smith   IS              sis, gis;
241e52d2c62SBarry Smith   const PetscInt *isis, *igis;
242e52d2c62SBarry Smith   PetscInt        n, *iis, nsis, ngis, rstart, i;
243e52d2c62SBarry Smith 
244e52d2c62SBarry Smith   PetscFunctionBegin;
2459566063dSJacob Faibussowitsch   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
2469566063dSJacob Faibussowitsch   PetscCall(MatFindNonzeroRows(a->B, &gis));
2479566063dSJacob Faibussowitsch   PetscCall(ISGetSize(gis, &ngis));
2489566063dSJacob Faibussowitsch   PetscCall(ISGetSize(sis, &nsis));
2499566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(sis, &isis));
2509566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(gis, &igis));
251e52d2c62SBarry Smith 
2529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ngis + nsis, &iis));
2539566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(iis, igis, ngis));
2549566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
255e52d2c62SBarry Smith   n = ngis + nsis;
2569566063dSJacob Faibussowitsch   PetscCall(PetscSortRemoveDupsInt(&n, iis));
2579566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
258e52d2c62SBarry Smith   for (i = 0; i < n; i++) iis[i] += rstart;
2599566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
260e52d2c62SBarry Smith 
2619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(sis, &isis));
2629566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(gis, &igis));
2639566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&sis));
2649566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&gis));
265e52d2c62SBarry Smith   PetscFunctionReturn(0);
266e52d2c62SBarry Smith }
267e52d2c62SBarry Smith 
268dd6ea824SBarry Smith /*
2690f5bd95cSBarry Smith   Local utility routine that creates a mapping from the global column
2709e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
2710f5bd95cSBarry Smith storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
2720f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor
27372fa4726SStefano Zampini has an order N integer array but is fast to access.
2749e25ed09SBarry Smith */
2759371c9d4SSatish Balay PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) {
27644a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
277d0f46423SBarry Smith   PetscInt    n   = aij->B->cmap->n, i;
278dbb450caSBarry Smith 
2793a40ed3dSBarry Smith   PetscFunctionBegin;
28008401ef6SPierre Jolivet   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
281aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2829566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap));
28348a46eb9SPierre Jolivet   for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES));
284b1fc9764SSatish Balay #else
2859566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
286905e6a2fSBarry Smith   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
287b1fc9764SSatish Balay #endif
2883a40ed3dSBarry Smith   PetscFunctionReturn(0);
2899e25ed09SBarry Smith }
2909e25ed09SBarry Smith 
291d40312a9SBarry Smith #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
2920520107fSSatish Balay   { \
293db4deed7SKarl Rupp     if (col <= lastcol1) low1 = 0; \
294db4deed7SKarl Rupp     else high1 = nrow1; \
295fd3458f5SBarry Smith     lastcol1 = col; \
296fd3458f5SBarry Smith     while (high1 - low1 > 5) { \
297fd3458f5SBarry Smith       t = (low1 + high1) / 2; \
298fd3458f5SBarry Smith       if (rp1[t] > col) high1 = t; \
299fd3458f5SBarry Smith       else low1 = t; \
300ba4e3ef2SSatish Balay     } \
301fd3458f5SBarry Smith     for (_i = low1; _i < high1; _i++) { \
302fd3458f5SBarry Smith       if (rp1[_i] > col) break; \
303fd3458f5SBarry Smith       if (rp1[_i] == col) { \
3040c0d7e18SFande Kong         if (addv == ADD_VALUES) { \
3050c0d7e18SFande Kong           ap1[_i] += value; \
3060c0d7e18SFande Kong           /* Not sure LogFlops will slow dow the code or not */ \
3070c0d7e18SFande Kong           (void)PetscLogFlops(1.0); \
3089371c9d4SSatish Balay         } else ap1[_i] = value; \
30930770e4dSSatish Balay         goto a_noinsert; \
3100520107fSSatish Balay       } \
3110520107fSSatish Balay     } \
3129371c9d4SSatish Balay     if (value == 0.0 && ignorezeroentries && row != col) { \
3139371c9d4SSatish Balay       low1  = 0; \
3149371c9d4SSatish Balay       high1 = nrow1; \
3159371c9d4SSatish Balay       goto a_noinsert; \
3169371c9d4SSatish Balay     } \
3179371c9d4SSatish Balay     if (nonew == 1) { \
3189371c9d4SSatish Balay       low1  = 0; \
3199371c9d4SSatish Balay       high1 = nrow1; \
3209371c9d4SSatish Balay       goto a_noinsert; \
3219371c9d4SSatish Balay     } \
32208401ef6SPierre Jolivet     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
323fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
3249371c9d4SSatish Balay     N = nrow1++ - 1; \
3259371c9d4SSatish Balay     a->nz++; \
3269371c9d4SSatish Balay     high1++; \
3270520107fSSatish Balay     /* shift up all the later entries in this row */ \
3289566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
3299566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
330fd3458f5SBarry Smith     rp1[_i] = col; \
331fd3458f5SBarry Smith     ap1[_i] = value; \
332e56f5c9eSBarry Smith     A->nonzerostate++; \
33330770e4dSSatish Balay   a_noinsert:; \
334fd3458f5SBarry Smith     ailen[row] = nrow1; \
3350520107fSSatish Balay   }
3360a198c4cSBarry Smith 
337d40312a9SBarry Smith #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
33830770e4dSSatish Balay   { \
339db4deed7SKarl Rupp     if (col <= lastcol2) low2 = 0; \
340db4deed7SKarl Rupp     else high2 = nrow2; \
341fd3458f5SBarry Smith     lastcol2 = col; \
342fd3458f5SBarry Smith     while (high2 - low2 > 5) { \
343fd3458f5SBarry Smith       t = (low2 + high2) / 2; \
344fd3458f5SBarry Smith       if (rp2[t] > col) high2 = t; \
345fd3458f5SBarry Smith       else low2 = t; \
346ba4e3ef2SSatish Balay     } \
347fd3458f5SBarry Smith     for (_i = low2; _i < high2; _i++) { \
348fd3458f5SBarry Smith       if (rp2[_i] > col) break; \
349fd3458f5SBarry Smith       if (rp2[_i] == col) { \
3500c0d7e18SFande Kong         if (addv == ADD_VALUES) { \
3510c0d7e18SFande Kong           ap2[_i] += value; \
3520c0d7e18SFande Kong           (void)PetscLogFlops(1.0); \
3539371c9d4SSatish Balay         } else ap2[_i] = value; \
35430770e4dSSatish Balay         goto b_noinsert; \
35530770e4dSSatish Balay       } \
35630770e4dSSatish Balay     } \
3579371c9d4SSatish Balay     if (value == 0.0 && ignorezeroentries) { \
3589371c9d4SSatish Balay       low2  = 0; \
3599371c9d4SSatish Balay       high2 = nrow2; \
3609371c9d4SSatish Balay       goto b_noinsert; \
3619371c9d4SSatish Balay     } \
3629371c9d4SSatish Balay     if (nonew == 1) { \
3639371c9d4SSatish Balay       low2  = 0; \
3649371c9d4SSatish Balay       high2 = nrow2; \
3659371c9d4SSatish Balay       goto b_noinsert; \
3669371c9d4SSatish Balay     } \
36708401ef6SPierre Jolivet     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
368fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
3699371c9d4SSatish Balay     N = nrow2++ - 1; \
3709371c9d4SSatish Balay     b->nz++; \
3719371c9d4SSatish Balay     high2++; \
37230770e4dSSatish Balay     /* shift up all the later entries in this row */ \
3739566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
3749566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
375fd3458f5SBarry Smith     rp2[_i] = col; \
376fd3458f5SBarry Smith     ap2[_i] = value; \
377e56f5c9eSBarry Smith     B->nonzerostate++; \
37830770e4dSSatish Balay   b_noinsert:; \
379fd3458f5SBarry Smith     bilen[row] = nrow2; \
38030770e4dSSatish Balay   }
38130770e4dSSatish Balay 
3829371c9d4SSatish Balay PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) {
3832fd7e33dSBarry Smith   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
3842fd7e33dSBarry Smith   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
3852fd7e33dSBarry Smith   PetscInt     l, *garray                         = mat->garray, diag;
386fff043a9SJunchao Zhang   PetscScalar *aa, *ba;
3872fd7e33dSBarry Smith 
3882fd7e33dSBarry Smith   PetscFunctionBegin;
3892fd7e33dSBarry Smith   /* code only works for square matrices A */
3902fd7e33dSBarry Smith 
3912fd7e33dSBarry Smith   /* find size of row to the left of the diagonal part */
3929566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
3932fd7e33dSBarry Smith   row = row - diag;
3942fd7e33dSBarry Smith   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
3952fd7e33dSBarry Smith     if (garray[b->j[b->i[row] + l]] > diag) break;
3962fd7e33dSBarry Smith   }
397fff043a9SJunchao Zhang   if (l) {
3989566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
3999566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
4009566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
401fff043a9SJunchao Zhang   }
4022fd7e33dSBarry Smith 
4032fd7e33dSBarry Smith   /* diagonal part */
404fff043a9SJunchao Zhang   if (a->i[row + 1] - a->i[row]) {
4059566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
4069566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
4079566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
408fff043a9SJunchao Zhang   }
4092fd7e33dSBarry Smith 
4102fd7e33dSBarry Smith   /* right of diagonal part */
411fff043a9SJunchao Zhang   if (b->i[row + 1] - b->i[row] - l) {
4129566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
4139566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
4149566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
415fff043a9SJunchao Zhang   }
4162fd7e33dSBarry Smith   PetscFunctionReturn(0);
4172fd7e33dSBarry Smith }
4182fd7e33dSBarry Smith 
4199371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
42044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
421071fcb05SBarry Smith   PetscScalar value = 0.0;
422d0f46423SBarry Smith   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
423d0f46423SBarry Smith   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
424ace3abfcSBarry Smith   PetscBool   roworiented = aij->roworiented;
4258a729477SBarry Smith 
4260520107fSSatish Balay   /* Some Variables required in the macro */
4274ee7247eSSatish Balay   Mat         A     = aij->A;
4284ee7247eSSatish Balay   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
42957809a77SBarry Smith   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
430ace3abfcSBarry Smith   PetscBool   ignorezeroentries = a->ignorezeroentries;
43130770e4dSSatish Balay   Mat         B                 = aij->B;
43230770e4dSSatish Balay   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
433d0f46423SBarry Smith   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
434ce496241SStefano Zampini   MatScalar  *aa, *ba;
435fd3458f5SBarry Smith   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
4368d76821aSHong Zhang   PetscInt    nonew;
437a77337e4SBarry Smith   MatScalar  *ap1, *ap2;
4384ee7247eSSatish Balay 
4393a40ed3dSBarry Smith   PetscFunctionBegin;
4409566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(A, &aa));
4419566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(B, &ba));
4428a729477SBarry Smith   for (i = 0; i < m; i++) {
4435ef9f2a5SBarry Smith     if (im[i] < 0) continue;
44408401ef6SPierre Jolivet     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
4454b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
4464b0e389bSBarry Smith       row      = im[i] - rstart;
447fd3458f5SBarry Smith       lastcol1 = -1;
448fd3458f5SBarry Smith       rp1      = aj + ai[row];
449fd3458f5SBarry Smith       ap1      = aa + ai[row];
450fd3458f5SBarry Smith       rmax1    = aimax[row];
451fd3458f5SBarry Smith       nrow1    = ailen[row];
452fd3458f5SBarry Smith       low1     = 0;
453fd3458f5SBarry Smith       high1    = nrow1;
454fd3458f5SBarry Smith       lastcol2 = -1;
455fd3458f5SBarry Smith       rp2      = bj + bi[row];
456d498b1e9SBarry Smith       ap2      = ba + bi[row];
457fd3458f5SBarry Smith       rmax2    = bimax[row];
458d498b1e9SBarry Smith       nrow2    = bilen[row];
459fd3458f5SBarry Smith       low2     = 0;
460fd3458f5SBarry Smith       high2    = nrow2;
461fd3458f5SBarry Smith 
4621eb62cbbSBarry Smith       for (j = 0; j < n; j++) {
463071fcb05SBarry Smith         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
464c80a64e6SBarry Smith         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
465fd3458f5SBarry Smith         if (in[j] >= cstart && in[j] < cend) {
466fd3458f5SBarry Smith           col   = in[j] - cstart;
4678d76821aSHong Zhang           nonew = a->nonew;
468d40312a9SBarry Smith           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
469f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
470f7d195e4SLawrence Mitchell           continue;
471f7d195e4SLawrence Mitchell         } else {
472f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
473227d817aSBarry Smith           if (mat->was_assembled) {
47448a46eb9SPierre Jolivet             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
475aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
4769566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */
477fa46199cSSatish Balay             col--;
478b1fc9764SSatish Balay #else
479905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
480b1fc9764SSatish Balay #endif
481fff043a9SJunchao Zhang             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
4829566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
4834b0e389bSBarry Smith               col   = in[j];
4849bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
485f9508a3cSSatish Balay               B     = aij->B;
486f9508a3cSSatish Balay               b     = (Mat_SeqAIJ *)B->data;
4879371c9d4SSatish Balay               bimax = b->imax;
4889371c9d4SSatish Balay               bi    = b->i;
4899371c9d4SSatish Balay               bilen = b->ilen;
4909371c9d4SSatish Balay               bj    = b->j;
4919371c9d4SSatish Balay               ba    = b->a;
492d498b1e9SBarry Smith               rp2   = bj + bi[row];
493d498b1e9SBarry Smith               ap2   = ba + bi[row];
494d498b1e9SBarry Smith               rmax2 = bimax[row];
495d498b1e9SBarry Smith               nrow2 = bilen[row];
496d498b1e9SBarry Smith               low2  = 0;
497d498b1e9SBarry Smith               high2 = nrow2;
498d0f46423SBarry Smith               bm    = aij->B->rmap->n;
499f9508a3cSSatish Balay               ba    = b->a;
500d707bf6cSMatthew Knepley             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
5010587a0fcSBarry Smith               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
5029566063dSJacob Faibussowitsch                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
50398921bdaSJacob Faibussowitsch               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
5040587a0fcSBarry Smith             }
505c48de900SBarry Smith           } else col = in[j];
5068d76821aSHong Zhang           nonew = b->nonew;
507d40312a9SBarry Smith           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
5081eb62cbbSBarry Smith         }
5091eb62cbbSBarry Smith       }
5105ef9f2a5SBarry Smith     } else {
51128b400f6SJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
51290f02eecSBarry Smith       if (!aij->donotstash) {
5135080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
514d36fbae8SSatish Balay         if (roworiented) {
5159566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
516d36fbae8SSatish Balay         } else {
5179566063dSJacob Faibussowitsch           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
5184b0e389bSBarry Smith         }
5191eb62cbbSBarry Smith       }
5208a729477SBarry Smith     }
52190f02eecSBarry Smith   }
5225519a089SJose E. Roman   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
5239566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(B, &ba));
5243a40ed3dSBarry Smith   PetscFunctionReturn(0);
5258a729477SBarry Smith }
5268a729477SBarry Smith 
5272b08fdbeSandi selinger /*
528904d1e70Sandi selinger     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
5292b08fdbeSandi selinger     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
530904d1e70Sandi selinger     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
5312b08fdbeSandi selinger */
5329371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) {
533904d1e70Sandi selinger   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
534904d1e70Sandi selinger   Mat         A      = aij->A; /* diagonal part of the matrix */
535904d1e70Sandi selinger   Mat         B      = aij->B; /* offdiagonal part of the matrix */
536904d1e70Sandi selinger   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
537904d1e70Sandi selinger   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
538904d1e70Sandi selinger   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
539904d1e70Sandi selinger   PetscInt   *ailen = a->ilen, *aj = a->j;
540904d1e70Sandi selinger   PetscInt   *bilen = b->ilen, *bj = b->j;
5416dc1ffa3Sandi selinger   PetscInt    am          = aij->A->rmap->n, j;
542904d1e70Sandi selinger   PetscInt    diag_so_far = 0, dnz;
543904d1e70Sandi selinger   PetscInt    offd_so_far = 0, onz;
544904d1e70Sandi selinger 
545904d1e70Sandi selinger   PetscFunctionBegin;
546904d1e70Sandi selinger   /* Iterate over all rows of the matrix */
547904d1e70Sandi selinger   for (j = 0; j < am; j++) {
548904d1e70Sandi selinger     dnz = onz = 0;
549904d1e70Sandi selinger     /*  Iterate over all non-zero columns of the current row */
5506dc1ffa3Sandi selinger     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
551904d1e70Sandi selinger       /* If column is in the diagonal */
552904d1e70Sandi selinger       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553904d1e70Sandi selinger         aj[diag_so_far++] = mat_j[col] - cstart;
554904d1e70Sandi selinger         dnz++;
555904d1e70Sandi selinger       } else { /* off-diagonal entries */
556904d1e70Sandi selinger         bj[offd_so_far++] = mat_j[col];
557904d1e70Sandi selinger         onz++;
558904d1e70Sandi selinger       }
559904d1e70Sandi selinger     }
560904d1e70Sandi selinger     ailen[j] = dnz;
561904d1e70Sandi selinger     bilen[j] = onz;
562904d1e70Sandi selinger   }
563904d1e70Sandi selinger   PetscFunctionReturn(0);
564904d1e70Sandi selinger }
565904d1e70Sandi selinger 
566904d1e70Sandi selinger /*
567904d1e70Sandi selinger     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568904d1e70Sandi selinger     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
5691de21080Sandi selinger     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
5701de21080Sandi selinger     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
5711de21080Sandi selinger     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572904d1e70Sandi selinger */
5739371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) {
5743a063d27Sandi selinger   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
5753a063d27Sandi selinger   Mat          A    = aij->A; /* diagonal part of the matrix */
5763a063d27Sandi selinger   Mat          B    = aij->B; /* offdiagonal part of the matrix */
577e9ede7d0Sandi selinger   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
5783a063d27Sandi selinger   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
5793a063d27Sandi selinger   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
5803a063d27Sandi selinger   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
5813a063d27Sandi selinger   PetscInt    *ailen = a->ilen, *aj = a->j;
5823a063d27Sandi selinger   PetscInt    *bilen = b->ilen, *bj = b->j;
5836dc1ffa3Sandi selinger   PetscInt     am          = aij->A->rmap->n, j;
5841de21080Sandi selinger   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
585904d1e70Sandi selinger   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
586904d1e70Sandi selinger   PetscScalar *aa = a->a, *ba = b->a;
5873a063d27Sandi selinger 
5883a063d27Sandi selinger   PetscFunctionBegin;
5893a063d27Sandi selinger   /* Iterate over all rows of the matrix */
5903a063d27Sandi selinger   for (j = 0; j < am; j++) {
591904d1e70Sandi selinger     dnz_row = onz_row = 0;
592904d1e70Sandi selinger     rowstart_offd     = full_offd_i[j];
593904d1e70Sandi selinger     rowstart_diag     = full_diag_i[j];
594e9ede7d0Sandi selinger     /*  Iterate over all non-zero columns of the current row */
595e9ede7d0Sandi selinger     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
596ae8e66a0Sandi selinger       /* If column is in the diagonal */
5973a063d27Sandi selinger       if (mat_j[col] >= cstart && mat_j[col] < cend) {
598904d1e70Sandi selinger         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
599904d1e70Sandi selinger         aa[rowstart_diag + dnz_row] = mat_a[col];
600904d1e70Sandi selinger         dnz_row++;
601ae8e66a0Sandi selinger       } else { /* off-diagonal entries */
602904d1e70Sandi selinger         bj[rowstart_offd + onz_row] = mat_j[col];
603904d1e70Sandi selinger         ba[rowstart_offd + onz_row] = mat_a[col];
604904d1e70Sandi selinger         onz_row++;
6053a063d27Sandi selinger       }
6063a063d27Sandi selinger     }
607904d1e70Sandi selinger     ailen[j] = dnz_row;
608904d1e70Sandi selinger     bilen[j] = onz_row;
6093a063d27Sandi selinger   }
6103a063d27Sandi selinger   PetscFunctionReturn(0);
6113a063d27Sandi selinger }
6123a063d27Sandi selinger 
6139371c9d4SSatish Balay PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
614b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
615d0f46423SBarry Smith   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
616d0f46423SBarry Smith   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
617b49de8d1SLois Curfman McInnes 
6183a40ed3dSBarry Smith   PetscFunctionBegin;
619b49de8d1SLois Curfman McInnes   for (i = 0; i < m; i++) {
62054c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
62154c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
622b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
623b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
624b49de8d1SLois Curfman McInnes       for (j = 0; j < n; j++) {
62554c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
62654c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
627b49de8d1SLois Curfman McInnes         if (idxn[j] >= cstart && idxn[j] < cend) {
628b49de8d1SLois Curfman McInnes           col = idxn[j] - cstart;
6299566063dSJacob Faibussowitsch           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
630fa852ad4SSatish Balay         } else {
63148a46eb9SPierre Jolivet           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
632aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
6339566063dSJacob Faibussowitsch           PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col));
634fa46199cSSatish Balay           col--;
635b1fc9764SSatish Balay #else
636905e6a2fSBarry Smith           col = aij->colmap[idxn[j]] - 1;
637b1fc9764SSatish Balay #endif
638e60e1c95SSatish Balay           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
63948a46eb9SPierre Jolivet           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
640b49de8d1SLois Curfman McInnes         }
641b49de8d1SLois Curfman McInnes       }
642f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
643b49de8d1SLois Curfman McInnes   }
6443a40ed3dSBarry Smith   PetscFunctionReturn(0);
645b49de8d1SLois Curfman McInnes }
646bc5ccf88SSatish Balay 
6479371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) {
648bc5ccf88SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
649b1d57f15SBarry Smith   PetscInt    nstash, reallocs;
650bc5ccf88SSatish Balay 
651bc5ccf88SSatish Balay   PetscFunctionBegin;
6522205254eSKarl Rupp   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
653bc5ccf88SSatish Balay 
6549566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
6559566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
6569566063dSJacob Faibussowitsch   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
657bc5ccf88SSatish Balay   PetscFunctionReturn(0);
658bc5ccf88SSatish Balay }
659bc5ccf88SSatish Balay 
6609371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) {
661bc5ccf88SSatish Balay   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
662b1d57f15SBarry Smith   PetscMPIInt  n;
663b1d57f15SBarry Smith   PetscInt     i, j, rstart, ncols, flg;
664e44c0bd4SBarry Smith   PetscInt    *row, *col;
665ace3abfcSBarry Smith   PetscBool    other_disassembled;
66687828ca2SBarry Smith   PetscScalar *val;
667bc5ccf88SSatish Balay 
66891c97fd4SSatish Balay   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
6696e111a19SKarl Rupp 
670bc5ccf88SSatish Balay   PetscFunctionBegin;
6714cb17eb5SBarry Smith   if (!aij->donotstash && !mat->nooffprocentries) {
672a2d1c673SSatish Balay     while (1) {
6739566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
674a2d1c673SSatish Balay       if (!flg) break;
675a2d1c673SSatish Balay 
676bc5ccf88SSatish Balay       for (i = 0; i < n;) {
677bc5ccf88SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
6782205254eSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
6792205254eSKarl Rupp           if (row[j] != rstart) break;
6802205254eSKarl Rupp         }
681bc5ccf88SSatish Balay         if (j < n) ncols = j - i;
682bc5ccf88SSatish Balay         else ncols = n - i;
683bc5ccf88SSatish Balay         /* Now assemble all these values with a single function call */
6849566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
685bc5ccf88SSatish Balay         i = j;
686bc5ccf88SSatish Balay       }
687bc5ccf88SSatish Balay     }
6889566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
689bc5ccf88SSatish Balay   }
6908c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
691c70f7ee4SJunchao Zhang   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
6929ecce9b1SRichard Tran Mills   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
6939ecce9b1SRichard Tran Mills   if (mat->boundtocpu) {
6949566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
6959566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
6969ecce9b1SRichard Tran Mills   }
697e2cf4d64SStefano Zampini #endif
6989566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(aij->A, mode));
6999566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(aij->A, mode));
700bc5ccf88SSatish Balay 
701bc5ccf88SSatish Balay   /* determine if any processor has disassembled, if so we must
702071fcb05SBarry Smith      also disassemble ourself, in order that we may reassemble. */
703bc5ccf88SSatish Balay   /*
704bc5ccf88SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
705bc5ccf88SSatish Balay      no processor disassembled thus we can skip this stuff
706bc5ccf88SSatish Balay   */
707bc5ccf88SSatish Balay   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
7085f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
709fff043a9SJunchao Zhang     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
7109566063dSJacob Faibussowitsch       PetscCall(MatDisAssemble_MPIAIJ(mat));
711ad59fb31SSatish Balay     }
712ad59fb31SSatish Balay   }
71348a46eb9SPierre Jolivet   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
7149566063dSJacob Faibussowitsch   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
7158c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
716c70f7ee4SJunchao Zhang   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
717e2cf4d64SStefano Zampini #endif
7189566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(aij->B, mode));
7199566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(aij->B, mode));
720bc5ccf88SSatish Balay 
7219566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
7222205254eSKarl Rupp 
723f4259b30SLisandro Dalcin   aij->rowvalues = NULL;
724a30b2313SHong Zhang 
7259566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->diag));
726e56f5c9eSBarry Smith 
7274f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
7284f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
729e56f5c9eSBarry Smith     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
7301c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
731e56f5c9eSBarry Smith   }
7328c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
733c70f7ee4SJunchao Zhang   mat->offloadmask = PETSC_OFFLOAD_BOTH;
734e2cf4d64SStefano Zampini #endif
735bc5ccf88SSatish Balay   PetscFunctionReturn(0);
736bc5ccf88SSatish Balay }
737bc5ccf88SSatish Balay 
7389371c9d4SSatish Balay PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) {
73944a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
7403a40ed3dSBarry Smith 
7413a40ed3dSBarry Smith   PetscFunctionBegin;
7429566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
7439566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
7443a40ed3dSBarry Smith   PetscFunctionReturn(0);
7451eb62cbbSBarry Smith }
7461eb62cbbSBarry Smith 
7479371c9d4SSatish Balay PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
7481b1dd7adSMatthew G. Knepley   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
749a92ad425SStefano Zampini   PetscObjectState sA, sB;
7501b1dd7adSMatthew G. Knepley   PetscInt        *lrows;
7516e520ac8SStefano Zampini   PetscInt         r, len;
752a92ad425SStefano Zampini   PetscBool        cong, lch, gch;
7531eb62cbbSBarry Smith 
7543a40ed3dSBarry Smith   PetscFunctionBegin;
7556e520ac8SStefano Zampini   /* get locally owned rows */
7569566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
7579566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &cong));
75897b48c8fSBarry Smith   /* fix right hand side if needed */
75997b48c8fSBarry Smith   if (x && b) {
7601b1dd7adSMatthew G. Knepley     const PetscScalar *xx;
7611b1dd7adSMatthew G. Knepley     PetscScalar       *bb;
7621b1dd7adSMatthew G. Knepley 
76328b400f6SJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
7649566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
7659566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
7661b1dd7adSMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
7679566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
7689566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
76997b48c8fSBarry Smith   }
770a92ad425SStefano Zampini 
771a92ad425SStefano Zampini   sA = mat->A->nonzerostate;
772a92ad425SStefano Zampini   sB = mat->B->nonzerostate;
773a92ad425SStefano Zampini 
774a92ad425SStefano Zampini   if (diag != 0.0 && cong) {
7759566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
7769566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
777a92ad425SStefano Zampini   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
778a92ad425SStefano Zampini     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
779a92ad425SStefano Zampini     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
780a92ad425SStefano Zampini     PetscInt    nnwA, nnwB;
781a92ad425SStefano Zampini     PetscBool   nnzA, nnzB;
782a92ad425SStefano Zampini 
783a92ad425SStefano Zampini     nnwA = aijA->nonew;
784a92ad425SStefano Zampini     nnwB = aijB->nonew;
785a92ad425SStefano Zampini     nnzA = aijA->keepnonzeropattern;
786a92ad425SStefano Zampini     nnzB = aijB->keepnonzeropattern;
787a92ad425SStefano Zampini     if (!nnzA) {
7889566063dSJacob Faibussowitsch       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
789a92ad425SStefano Zampini       aijA->nonew = 0;
790a92ad425SStefano Zampini     }
791a92ad425SStefano Zampini     if (!nnzB) {
7929566063dSJacob Faibussowitsch       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
793a92ad425SStefano Zampini       aijB->nonew = 0;
794a92ad425SStefano Zampini     }
795a92ad425SStefano Zampini     /* Must zero here before the next loop */
7969566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
7979566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
7981b1dd7adSMatthew G. Knepley     for (r = 0; r < len; ++r) {
7991b1dd7adSMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
800a92ad425SStefano Zampini       if (row >= A->cmap->N) continue;
8019566063dSJacob Faibussowitsch       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
802e2d53e46SBarry Smith     }
803a92ad425SStefano Zampini     aijA->nonew = nnwA;
804a92ad425SStefano Zampini     aijB->nonew = nnwB;
8056eb55b6aSBarry Smith   } else {
8069566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
8079566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
8086eb55b6aSBarry Smith   }
8099566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
8109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
8119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
8124f9cfa9eSBarry Smith 
813a92ad425SStefano Zampini   /* reduce nonzerostate */
814a92ad425SStefano Zampini   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
8151c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
816a92ad425SStefano Zampini   if (gch) A->nonzerostate++;
8173a40ed3dSBarry Smith   PetscFunctionReturn(0);
8181eb62cbbSBarry Smith }
8191eb62cbbSBarry Smith 
8209371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
8219c7c4993SBarry Smith   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
8225ba17502SJed Brown   PetscMPIInt        n = A->rmap->n;
823131c27b5Sprj-   PetscInt           i, j, r, m, len = 0;
82454bd4135SMatthew G. Knepley   PetscInt          *lrows, *owners = A->rmap->range;
825131c27b5Sprj-   PetscMPIInt        p = 0;
82654bd4135SMatthew G. Knepley   PetscSFNode       *rrows;
82754bd4135SMatthew G. Knepley   PetscSF            sf;
8289c7c4993SBarry Smith   const PetscScalar *xx;
829fff043a9SJunchao Zhang   PetscScalar       *bb, *mask, *aij_a;
830564f14d6SBarry Smith   Vec                xmask, lmask;
831564f14d6SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
832564f14d6SBarry Smith   const PetscInt    *aj, *ii, *ridx;
833564f14d6SBarry Smith   PetscScalar       *aa;
8349c7c4993SBarry Smith 
8359c7c4993SBarry Smith   PetscFunctionBegin;
83654bd4135SMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
8379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lrows));
83854bd4135SMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
8399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N, &rrows));
84054bd4135SMatthew G. Knepley   for (r = 0; r < N; ++r) {
84154bd4135SMatthew G. Knepley     const PetscInt idx = rows[r];
842aed4548fSBarry Smith     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
8435ba17502SJed Brown     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
8449566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
8455ba17502SJed Brown     }
84654bd4135SMatthew G. Knepley     rrows[r].rank  = p;
84754bd4135SMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
8489c7c4993SBarry Smith   }
8499566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
8509566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
85154bd4135SMatthew G. Knepley   /* Collect flags for rows to be zeroed */
8529566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
8539566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
8549566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
85554bd4135SMatthew G. Knepley   /* Compress and put in row numbers */
8569371c9d4SSatish Balay   for (r = 0; r < n; ++r)
8579371c9d4SSatish Balay     if (lrows[r] >= 0) lrows[len++] = r;
858564f14d6SBarry Smith   /* zero diagonal part of matrix */
8599566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
860564f14d6SBarry Smith   /* handle off diagonal part of matrix */
8619566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(A, &xmask, NULL));
8629566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(l->lvec, &lmask));
8639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xmask, &bb));
86454bd4135SMatthew G. Knepley   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
8659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xmask, &bb));
8669566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
8679566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
8689566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&xmask));
869a92ad425SStefano Zampini   if (x && b) { /* this code is buggy when the row and column layout don't match */
870a92ad425SStefano Zampini     PetscBool cong;
871a92ad425SStefano Zampini 
8729566063dSJacob Faibussowitsch     PetscCall(MatHasCongruentLayouts(A, &cong));
87328b400f6SJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
8749566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
8759566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
8769566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(l->lvec, &xx));
8779566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
878377aa5a1SBarry Smith   }
8799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lmask, &mask));
880564f14d6SBarry Smith   /* remove zeroed rows of off diagonal matrix */
8819566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
882564f14d6SBarry Smith   ii = aij->i;
88348a46eb9SPierre Jolivet   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
884564f14d6SBarry Smith   /* loop over all elements of off process part of matrix zeroing removed columns*/
885564f14d6SBarry Smith   if (aij->compressedrow.use) {
886564f14d6SBarry Smith     m    = aij->compressedrow.nrows;
887564f14d6SBarry Smith     ii   = aij->compressedrow.i;
888564f14d6SBarry Smith     ridx = aij->compressedrow.rindex;
889564f14d6SBarry Smith     for (i = 0; i < m; i++) {
890564f14d6SBarry Smith       n  = ii[i + 1] - ii[i];
891564f14d6SBarry Smith       aj = aij->j + ii[i];
892fff043a9SJunchao Zhang       aa = aij_a + ii[i];
893564f14d6SBarry Smith 
894564f14d6SBarry Smith       for (j = 0; j < n; j++) {
89525266a92SSatish Balay         if (PetscAbsScalar(mask[*aj])) {
896377aa5a1SBarry Smith           if (b) bb[*ridx] -= *aa * xx[*aj];
897564f14d6SBarry Smith           *aa = 0.0;
898564f14d6SBarry Smith         }
899564f14d6SBarry Smith         aa++;
900564f14d6SBarry Smith         aj++;
901564f14d6SBarry Smith       }
902564f14d6SBarry Smith       ridx++;
903564f14d6SBarry Smith     }
904564f14d6SBarry Smith   } else { /* do not use compressed row format */
905564f14d6SBarry Smith     m = l->B->rmap->n;
906564f14d6SBarry Smith     for (i = 0; i < m; i++) {
907564f14d6SBarry Smith       n  = ii[i + 1] - ii[i];
908564f14d6SBarry Smith       aj = aij->j + ii[i];
909fff043a9SJunchao Zhang       aa = aij_a + ii[i];
910564f14d6SBarry Smith       for (j = 0; j < n; j++) {
91125266a92SSatish Balay         if (PetscAbsScalar(mask[*aj])) {
912377aa5a1SBarry Smith           if (b) bb[i] -= *aa * xx[*aj];
913564f14d6SBarry Smith           *aa = 0.0;
914564f14d6SBarry Smith         }
915564f14d6SBarry Smith         aa++;
916564f14d6SBarry Smith         aj++;
917564f14d6SBarry Smith       }
918564f14d6SBarry Smith     }
919564f14d6SBarry Smith   }
920a92ad425SStefano Zampini   if (x && b) {
9219566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
9229566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
923377aa5a1SBarry Smith   }
9249566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
9259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lmask, &mask));
9269566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lmask));
9279566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
9284f9cfa9eSBarry Smith 
9294f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
9304f9cfa9eSBarry Smith   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
9314f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
9321c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
9334f9cfa9eSBarry Smith   }
9349c7c4993SBarry Smith   PetscFunctionReturn(0);
9359c7c4993SBarry Smith }
9369c7c4993SBarry Smith 
9379371c9d4SSatish Balay PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) {
938416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
939b1d57f15SBarry Smith   PetscInt    nt;
94019b3b6edSHong Zhang   VecScatter  Mvctx = a->Mvctx;
941416022c9SBarry Smith 
9423a40ed3dSBarry Smith   PetscFunctionBegin;
9439566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(xx, &nt));
94408401ef6SPierre Jolivet   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
9459566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
946296d8154SBarry Smith   PetscUseTypeMethod(a->A, mult, xx, yy);
9479566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
948296d8154SBarry Smith   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
9493a40ed3dSBarry Smith   PetscFunctionReturn(0);
9501eb62cbbSBarry Smith }
9511eb62cbbSBarry Smith 
9529371c9d4SSatish Balay PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) {
953bd0c2dcbSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
954bd0c2dcbSBarry Smith 
955bd0c2dcbSBarry Smith   PetscFunctionBegin;
9569566063dSJacob Faibussowitsch   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
957bd0c2dcbSBarry Smith   PetscFunctionReturn(0);
958bd0c2dcbSBarry Smith }
959bd0c2dcbSBarry Smith 
9609371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
961416022c9SBarry Smith   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
96201ad2aeeSHong Zhang   VecScatter  Mvctx = a->Mvctx;
9633a40ed3dSBarry Smith 
9643a40ed3dSBarry Smith   PetscFunctionBegin;
9659566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
9669566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
9679566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
9689566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
9693a40ed3dSBarry Smith   PetscFunctionReturn(0);
970da3a660dSBarry Smith }
971da3a660dSBarry Smith 
9729371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) {
973416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
974da3a660dSBarry Smith 
9753a40ed3dSBarry Smith   PetscFunctionBegin;
976da3a660dSBarry Smith   /* do nondiagonal part */
9779566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
978da3a660dSBarry Smith   /* do local part */
9799566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
9809613dc34SJunchao Zhang   /* add partial results together */
9819566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
9829566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
9833a40ed3dSBarry Smith   PetscFunctionReturn(0);
984da3a660dSBarry Smith }
985da3a660dSBarry Smith 
9869371c9d4SSatish Balay PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) {
9874f423910Svictorle   MPI_Comm    comm;
988cd0d46ebSvictorle   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
98966501d38Svictorle   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
990cd0d46ebSvictorle   IS          Me, Notme;
991b1d57f15SBarry Smith   PetscInt    M, N, first, last, *notme, i;
99254d735aeSStefano Zampini   PetscBool   lf;
993b1d57f15SBarry Smith   PetscMPIInt size;
994cd0d46ebSvictorle 
995cd0d46ebSvictorle   PetscFunctionBegin;
99642e5f5b4Svictorle   /* Easy test: symmetric diagonal block */
9979371c9d4SSatish Balay   Bij  = (Mat_MPIAIJ *)Bmat->data;
9989371c9d4SSatish Balay   Bdia = Bij->A;
9999566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
10001c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1001cd0d46ebSvictorle   if (!*f) PetscFunctionReturn(0);
10029566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
10039566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1004b1d57f15SBarry Smith   if (size == 1) PetscFunctionReturn(0);
100542e5f5b4Svictorle 
10067dae84e0SHong Zhang   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
10079566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Amat, &M, &N));
10089566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
10099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N - last + first, &notme));
1010cd0d46ebSvictorle   for (i = 0; i < first; i++) notme[i] = i;
1011cd0d46ebSvictorle   for (i = last; i < M; i++) notme[i - last + first] = i;
10129566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
10139566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
10149566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
101566501d38Svictorle   Aoff = Aoffs[0];
10169566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
101766501d38Svictorle   Boff = Boffs[0];
10189566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
10199566063dSJacob Faibussowitsch   PetscCall(MatDestroyMatrices(1, &Aoffs));
10209566063dSJacob Faibussowitsch   PetscCall(MatDestroyMatrices(1, &Boffs));
10219566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&Me));
10229566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&Notme));
10239566063dSJacob Faibussowitsch   PetscCall(PetscFree(notme));
1024cd0d46ebSvictorle   PetscFunctionReturn(0);
1025cd0d46ebSvictorle }
1026cd0d46ebSvictorle 
10279371c9d4SSatish Balay PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) {
1028a3bbdb47SHong Zhang   PetscFunctionBegin;
10299566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1030a3bbdb47SHong Zhang   PetscFunctionReturn(0);
1031a3bbdb47SHong Zhang }
1032a3bbdb47SHong Zhang 
10339371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1034416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035da3a660dSBarry Smith 
10363a40ed3dSBarry Smith   PetscFunctionBegin;
1037da3a660dSBarry Smith   /* do nondiagonal part */
10389566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1039da3a660dSBarry Smith   /* do local part */
10409566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
10419613dc34SJunchao Zhang   /* add partial results together */
10429566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
10439566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
10443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1045da3a660dSBarry Smith }
1046da3a660dSBarry Smith 
10471eb62cbbSBarry Smith /*
10481eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
10491eb62cbbSBarry Smith    diagonal block
10501eb62cbbSBarry Smith */
10519371c9d4SSatish Balay PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) {
1052416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
10533a40ed3dSBarry Smith 
10543a40ed3dSBarry Smith   PetscFunctionBegin;
105508401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1056aed4548fSBarry Smith   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
10579566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
10583a40ed3dSBarry Smith   PetscFunctionReturn(0);
10591eb62cbbSBarry Smith }
10601eb62cbbSBarry Smith 
10619371c9d4SSatish Balay PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) {
1062052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
10633a40ed3dSBarry Smith 
10643a40ed3dSBarry Smith   PetscFunctionBegin;
10659566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
10669566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
10673a40ed3dSBarry Smith   PetscFunctionReturn(0);
1068052efed2SBarry Smith }
1069052efed2SBarry Smith 
1070cbc6b225SStefano Zampini /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
10719371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) {
1072cbc6b225SStefano Zampini   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1073cbc6b225SStefano Zampini 
1074cbc6b225SStefano Zampini   PetscFunctionBegin;
10759566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&aij->coo_sf));
1076158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Aperm1));
1077158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bperm1));
1078158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Ajmap1));
1079158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bjmap1));
1080158ec288SJunchao Zhang 
1081158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Aimap2));
1082158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bimap2));
1083158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Aperm2));
1084158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bperm2));
1085158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Ajmap2));
1086158ec288SJunchao Zhang   PetscCall(PetscFree(aij->Bjmap2));
1087158ec288SJunchao Zhang 
10889566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
10899566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->Cperm1));
1090cbc6b225SStefano Zampini   PetscFunctionReturn(0);
1091cbc6b225SStefano Zampini }
1092cbc6b225SStefano Zampini 
10939371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIAIJ(Mat mat) {
109444a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
109583e2fdc7SBarry Smith 
10963a40ed3dSBarry Smith   PetscFunctionBegin;
1097aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1098c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1099a5a9c739SBarry Smith #endif
11009566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
11019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->diag));
11029566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&aij->A));
11039566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&aij->B));
1104aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
11059566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&aij->colmap));
1106b1fc9764SSatish Balay #else
11079566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->colmap));
1108b1fc9764SSatish Balay #endif
11099566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->garray));
11109566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->lvec));
11119566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&aij->Mvctx));
11129566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
11139566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->ld));
1114394ed5ebSJunchao Zhang 
1115cbc6b225SStefano Zampini   /* Free COO */
11169566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1117394ed5ebSJunchao Zhang 
11189566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
1119901853e0SKris Buschelman 
11206718818eSStefano Zampini   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
11219566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
11226718818eSStefano Zampini 
11239566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
11249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
11259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
11269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
11279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
11289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
11299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
11309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
11319566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
11329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
11333d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA)
11349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
11353d0639e7SStefano Zampini #endif
11363d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
11379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
11383d0639e7SStefano Zampini #endif
11399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
11405d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
11419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
11425d7652ecSHong Zhang #endif
1143d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
11449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1145d24d4204SJose E. Roman #endif
114663c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE)
11479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
11489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
114963c07aadSStefano Zampini #endif
11509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
11519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
11529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
11539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
11549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
11559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
11563d0639e7SStefano Zampini #if defined(PETSC_HAVE_MKL_SPARSE)
11579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
11583d0639e7SStefano Zampini #endif
11599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
11609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
11619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
11629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
11639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
11643a40ed3dSBarry Smith   PetscFunctionReturn(0);
11651eb62cbbSBarry Smith }
1166ee50ffe9SBarry Smith 
11679371c9d4SSatish Balay PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
11688e2fed03SBarry Smith   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
11698e2fed03SBarry Smith   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
11708e2fed03SBarry Smith   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
11713ea6fe3dSLisandro Dalcin   const PetscInt    *garray = aij->garray;
11722e5835c6SStefano Zampini   const PetscScalar *aa, *ba;
11733ea6fe3dSLisandro Dalcin   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
11743ea6fe3dSLisandro Dalcin   PetscInt          *rowlens;
11753ea6fe3dSLisandro Dalcin   PetscInt          *colidxs;
11763ea6fe3dSLisandro Dalcin   PetscScalar       *matvals;
11778e2fed03SBarry Smith 
11788e2fed03SBarry Smith   PetscFunctionBegin;
11799566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
11803ea6fe3dSLisandro Dalcin 
11813ea6fe3dSLisandro Dalcin   M  = mat->rmap->N;
11823ea6fe3dSLisandro Dalcin   N  = mat->cmap->N;
11833ea6fe3dSLisandro Dalcin   m  = mat->rmap->n;
11843ea6fe3dSLisandro Dalcin   rs = mat->rmap->rstart;
11853ea6fe3dSLisandro Dalcin   cs = mat->cmap->rstart;
11868e2fed03SBarry Smith   nz = A->nz + B->nz;
11873ea6fe3dSLisandro Dalcin 
11883ea6fe3dSLisandro Dalcin   /* write matrix header */
11890700a824SBarry Smith   header[0] = MAT_FILE_CLASSID;
11909371c9d4SSatish Balay   header[1] = M;
11919371c9d4SSatish Balay   header[2] = N;
11929371c9d4SSatish Balay   header[3] = nz;
11939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
11949566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
11958e2fed03SBarry Smith 
11963ea6fe3dSLisandro Dalcin   /* fill in and store row lengths  */
11979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
11983ea6fe3dSLisandro Dalcin   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
11999566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
12009566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
12018e2fed03SBarry Smith 
12023ea6fe3dSLisandro Dalcin   /* fill in and store column indices */
12039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
12043ea6fe3dSLisandro Dalcin   for (cnt = 0, i = 0; i < m; i++) {
12053ea6fe3dSLisandro Dalcin     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
12063ea6fe3dSLisandro Dalcin       if (garray[B->j[jb]] > cs) break;
12073ea6fe3dSLisandro Dalcin       colidxs[cnt++] = garray[B->j[jb]];
12088e2fed03SBarry Smith     }
12099371c9d4SSatish Balay     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
12109371c9d4SSatish Balay     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
12118e2fed03SBarry Smith   }
121208401ef6SPierre Jolivet   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
12139566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
12149566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
12158e2fed03SBarry Smith 
12163ea6fe3dSLisandro Dalcin   /* fill in and store nonzero values */
12179566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
12189566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
12199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
12203ea6fe3dSLisandro Dalcin   for (cnt = 0, i = 0; i < m; i++) {
12213ea6fe3dSLisandro Dalcin     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
12223ea6fe3dSLisandro Dalcin       if (garray[B->j[jb]] > cs) break;
12232e5835c6SStefano Zampini       matvals[cnt++] = ba[jb];
12248e2fed03SBarry Smith     }
12259371c9d4SSatish Balay     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
12269371c9d4SSatish Balay     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
12278e2fed03SBarry Smith   }
12289566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
12299566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
123008401ef6SPierre Jolivet   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
12319566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
12329566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
12338e2fed03SBarry Smith 
12343ea6fe3dSLisandro Dalcin   /* write block size option to the viewer's .info file */
12359566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
12368e2fed03SBarry Smith   PetscFunctionReturn(0);
12378e2fed03SBarry Smith }
12388e2fed03SBarry Smith 
12399804daf3SBarry Smith #include <petscdraw.h>
12409371c9d4SSatish Balay PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
124144a69424SLois Curfman McInnes   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
124232dcc486SBarry Smith   PetscMPIInt       rank = aij->rank, size = aij->size;
1243ace3abfcSBarry Smith   PetscBool         isdraw, iascii, isbinary;
1244b0a32e0cSBarry Smith   PetscViewer       sviewer;
1245f3ef73ceSBarry Smith   PetscViewerFormat format;
1246416022c9SBarry Smith 
12473a40ed3dSBarry Smith   PetscFunctionBegin;
12489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
12499566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
12509566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
125132077d6dSBarry Smith   if (iascii) {
12529566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
1253ef5fdb51SBarry Smith     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1254ef5fdb51SBarry Smith       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
12559566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(size, &nz));
12569566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1257ef5fdb51SBarry Smith       for (i = 0; i < (PetscInt)size; i++) {
1258ef5fdb51SBarry Smith         nmax = PetscMax(nmax, nz[i]);
1259ef5fdb51SBarry Smith         nmin = PetscMin(nmin, nz[i]);
1260ef5fdb51SBarry Smith         navg += nz[i];
1261ef5fdb51SBarry Smith       }
12629566063dSJacob Faibussowitsch       PetscCall(PetscFree(nz));
1263ef5fdb51SBarry Smith       navg = navg / size;
12649566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1265ef5fdb51SBarry Smith       PetscFunctionReturn(0);
1266ef5fdb51SBarry Smith     }
12679566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
1268456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
12694e220ebcSLois Curfman McInnes       MatInfo   info;
12706335e310SSatish Balay       PetscInt *inodes = NULL;
1271923f20ffSKris Buschelman 
12729566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
12739566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
12749566063dSJacob Faibussowitsch       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
12759566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1276923f20ffSKris Buschelman       if (!inodes) {
12779371c9d4SSatish Balay         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
12789371c9d4SSatish Balay                                                      (double)info.memory));
12796831982aSBarry Smith       } else {
12809371c9d4SSatish Balay         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
12819371c9d4SSatish Balay                                                      (double)info.memory));
12826831982aSBarry Smith       }
12839566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
12849566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
12859566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
12869566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
12879566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
12889566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
12899566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
12909566063dSJacob Faibussowitsch       PetscCall(VecScatterView(aij->Mvctx, viewer));
12913a40ed3dSBarry Smith       PetscFunctionReturn(0);
1292fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1293923f20ffSKris Buschelman       PetscInt inodecount, inodelimit, *inodes;
12949566063dSJacob Faibussowitsch       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1295923f20ffSKris Buschelman       if (inodes) {
12969566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1297d38fa0fbSBarry Smith       } else {
12989566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1299d38fa0fbSBarry Smith       }
13003a40ed3dSBarry Smith       PetscFunctionReturn(0);
13014aedb280SBarry Smith     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
13024aedb280SBarry Smith       PetscFunctionReturn(0);
130308480c60SBarry Smith     }
13048e2fed03SBarry Smith   } else if (isbinary) {
13058e2fed03SBarry Smith     if (size == 1) {
13069566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
13079566063dSJacob Faibussowitsch       PetscCall(MatView(aij->A, viewer));
13088e2fed03SBarry Smith     } else {
13099566063dSJacob Faibussowitsch       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
13108e2fed03SBarry Smith     }
13118e2fed03SBarry Smith     PetscFunctionReturn(0);
131271e56450SStefano Zampini   } else if (iascii && size == 1) {
13139566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
13149566063dSJacob Faibussowitsch     PetscCall(MatView(aij->A, viewer));
131571e56450SStefano Zampini     PetscFunctionReturn(0);
13160f5bd95cSBarry Smith   } else if (isdraw) {
1317b0a32e0cSBarry Smith     PetscDraw draw;
1318ace3abfcSBarry Smith     PetscBool isnull;
13199566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
13209566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
1321383922c3SLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
132219bcc07fSBarry Smith   }
132319bcc07fSBarry Smith 
132471e56450SStefano Zampini   { /* assemble the entire matrix onto first processor */
132571e56450SStefano Zampini     Mat A = NULL, Av;
132671e56450SStefano Zampini     IS  isrow, iscol;
13272ee70a88SLois Curfman McInnes 
13289566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
13299566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
13309566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
13319566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
133271e56450SStefano Zampini     /*  The commented code uses MatCreateSubMatrices instead */
133371e56450SStefano Zampini     /*
133471e56450SStefano Zampini     Mat *AA, A = NULL, Av;
133571e56450SStefano Zampini     IS  isrow,iscol;
133671e56450SStefano Zampini 
13379566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
13389566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
13399566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1340dd400576SPatrick Sanan     if (rank == 0) {
13419566063dSJacob Faibussowitsch        PetscCall(PetscObjectReference((PetscObject)AA[0]));
134271e56450SStefano Zampini        A    = AA[0];
134371e56450SStefano Zampini        Av   = AA[0];
134495373324SBarry Smith     }
13459566063dSJacob Faibussowitsch     PetscCall(MatDestroySubMatrices(1,&AA));
134671e56450SStefano Zampini */
13479566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol));
13489566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrow));
134955843e3eSBarry Smith     /*
135055843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1351b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
135255843e3eSBarry Smith     */
13539566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1354dd400576SPatrick Sanan     if (rank == 0) {
135548a46eb9SPierre Jolivet       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
13569566063dSJacob Faibussowitsch       PetscCall(MatView_SeqAIJ(Av, sviewer));
135795373324SBarry Smith     }
13589566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
13599566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
13609566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
136195373324SBarry Smith   }
13623a40ed3dSBarry Smith   PetscFunctionReturn(0);
13631eb62cbbSBarry Smith }
13641eb62cbbSBarry Smith 
13659371c9d4SSatish Balay PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) {
1366ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
1367416022c9SBarry Smith 
13683a40ed3dSBarry Smith   PetscFunctionBegin;
13699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
13709566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
13719566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
13729566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
137348a46eb9SPierre Jolivet   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
13743a40ed3dSBarry Smith   PetscFunctionReturn(0);
1375416022c9SBarry Smith }
1376416022c9SBarry Smith 
13779371c9d4SSatish Balay PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
137844a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1379f4259b30SLisandro Dalcin   Vec         bb1 = NULL;
1380ace3abfcSBarry Smith   PetscBool   hasop;
13818a729477SBarry Smith 
13823a40ed3dSBarry Smith   PetscFunctionBegin;
1383a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
13849566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1385a2b30743SBarry Smith     PetscFunctionReturn(0);
1386a2b30743SBarry Smith   }
1387a2b30743SBarry Smith 
138848a46eb9SPierre Jolivet   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
13894e980039SJed Brown 
1390c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1391da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
13929566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
13932798e883SHong Zhang       its--;
1394da3a660dSBarry Smith     }
13952798e883SHong Zhang 
13962798e883SHong Zhang     while (its--) {
13979566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
13989566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
13992798e883SHong Zhang 
1400c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14019566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
14029566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
14032798e883SHong Zhang 
1404c14dc6b6SHong Zhang       /* local sweep */
14059566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
14062798e883SHong Zhang     }
14073a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1408da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14099566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
14102798e883SHong Zhang       its--;
1411da3a660dSBarry Smith     }
14122798e883SHong Zhang     while (its--) {
14139566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14149566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14152798e883SHong Zhang 
1416c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14179566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
14189566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1419c14dc6b6SHong Zhang 
1420c14dc6b6SHong Zhang       /* local sweep */
14219566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
14222798e883SHong Zhang     }
14233a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1424da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14259566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
14262798e883SHong Zhang       its--;
1427da3a660dSBarry Smith     }
14282798e883SHong Zhang     while (its--) {
14299566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14309566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14312798e883SHong Zhang 
1432c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14339566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
14349566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
14352798e883SHong Zhang 
1436c14dc6b6SHong Zhang       /* local sweep */
14379566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
14382798e883SHong Zhang     }
1439a7420bb7SBarry Smith   } else if (flag & SOR_EISENSTAT) {
1440a7420bb7SBarry Smith     Vec xx1;
1441a7420bb7SBarry Smith 
14429566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &xx1));
14439566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1444a7420bb7SBarry Smith 
14459566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14469566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1447a7420bb7SBarry Smith     if (!mat->diag) {
14489566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
14499566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
1450a7420bb7SBarry Smith     }
14519566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1452bd0c2dcbSBarry Smith     if (hasop) {
14539566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1454bd0c2dcbSBarry Smith     } else {
14559566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1456bd0c2dcbSBarry Smith     }
14579566063dSJacob Faibussowitsch     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1458887ee2caSBarry Smith 
14599566063dSJacob Faibussowitsch     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1460a7420bb7SBarry Smith 
1461a7420bb7SBarry Smith     /* local sweep */
14629566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
14639566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
14649566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&xx1));
1465ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1466c14dc6b6SHong Zhang 
14679566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&bb1));
1468a0808db4SHong Zhang 
14697b6c816cSBarry Smith   matin->factorerrortype = mat->A->factorerrortype;
14703a40ed3dSBarry Smith   PetscFunctionReturn(0);
14718a729477SBarry Smith }
1472a66be287SLois Curfman McInnes 
14739371c9d4SSatish Balay PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) {
147472e6a0cfSJed Brown   Mat             aA, aB, Aperm;
147572e6a0cfSJed Brown   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
147672e6a0cfSJed Brown   PetscScalar    *aa, *ba;
147772e6a0cfSJed Brown   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
147872e6a0cfSJed Brown   PetscSF         rowsf, sf;
14790298fd71SBarry Smith   IS              parcolp = NULL;
148072e6a0cfSJed Brown   PetscBool       done;
148142e855d1Svictor 
148242e855d1Svictor   PetscFunctionBegin;
14839566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &m, &n));
14849566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(rowp, &rwant));
14859566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(colp, &cwant));
14869566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
148772e6a0cfSJed Brown 
148872e6a0cfSJed Brown   /* Invert row permutation to find out where my rows should go */
14899566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
14909566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
14919566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(rowsf));
149272e6a0cfSJed Brown   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
14939566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
14949566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
149572e6a0cfSJed Brown 
149672e6a0cfSJed Brown   /* Invert column permutation to find out where my columns should go */
14979566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
14989566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
14999566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
150072e6a0cfSJed Brown   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
15019566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
15029566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
15039566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
150472e6a0cfSJed Brown 
15059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(rowp, &rwant));
15069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(colp, &cwant));
15079566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
150872e6a0cfSJed Brown 
150972e6a0cfSJed Brown   /* Find out where my gcols should go */
15109566063dSJacob Faibussowitsch   PetscCall(MatGetSize(aB, NULL, &ng));
15119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ng, &gcdest));
15129566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
15139566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
15149566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
15159566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
15169566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
15179566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
151872e6a0cfSJed Brown 
15199566063dSJacob Faibussowitsch   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
15209566063dSJacob Faibussowitsch   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
15219566063dSJacob Faibussowitsch   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
152272e6a0cfSJed Brown   for (i = 0; i < m; i++) {
1523131c27b5Sprj-     PetscInt    row = rdest[i];
1524131c27b5Sprj-     PetscMPIInt rowner;
15259566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
152672e6a0cfSJed Brown     for (j = ai[i]; j < ai[i + 1]; j++) {
1527131c27b5Sprj-       PetscInt    col = cdest[aj[j]];
1528131c27b5Sprj-       PetscMPIInt cowner;
15299566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
153072e6a0cfSJed Brown       if (rowner == cowner) dnnz[i]++;
153172e6a0cfSJed Brown       else onnz[i]++;
153272e6a0cfSJed Brown     }
153372e6a0cfSJed Brown     for (j = bi[i]; j < bi[i + 1]; j++) {
1534131c27b5Sprj-       PetscInt    col = gcdest[bj[j]];
1535131c27b5Sprj-       PetscMPIInt cowner;
15369566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
153772e6a0cfSJed Brown       if (rowner == cowner) dnnz[i]++;
153872e6a0cfSJed Brown       else onnz[i]++;
153972e6a0cfSJed Brown     }
154072e6a0cfSJed Brown   }
15419566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
15429566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
15439566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
15449566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
15459566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&rowsf));
154672e6a0cfSJed Brown 
15479566063dSJacob Faibussowitsch   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
15489566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(aA, &aa));
15499566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(aB, &ba));
155072e6a0cfSJed Brown   for (i = 0; i < m; i++) {
155172e6a0cfSJed Brown     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1552970468b0SJed Brown     PetscInt  j0, rowlen;
155372e6a0cfSJed Brown     rowlen = ai[i + 1] - ai[i];
1554970468b0SJed Brown     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1555970468b0SJed Brown       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
15569566063dSJacob Faibussowitsch       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1557970468b0SJed Brown     }
155872e6a0cfSJed Brown     rowlen = bi[i + 1] - bi[i];
1559970468b0SJed Brown     for (j0 = j = 0; j < rowlen; j0 = j) {
1560970468b0SJed Brown       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
15619566063dSJacob Faibussowitsch       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1562970468b0SJed Brown     }
156372e6a0cfSJed Brown   }
15649566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
15659566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
15669566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
15679566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
15689566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
15699566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
15709566063dSJacob Faibussowitsch   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
15719566063dSJacob Faibussowitsch   PetscCall(PetscFree3(work, rdest, cdest));
15729566063dSJacob Faibussowitsch   PetscCall(PetscFree(gcdest));
15739566063dSJacob Faibussowitsch   if (parcolp) PetscCall(ISDestroy(&colp));
157472e6a0cfSJed Brown   *B = Aperm;
157542e855d1Svictor   PetscFunctionReturn(0);
157642e855d1Svictor }
157742e855d1Svictor 
15789371c9d4SSatish Balay PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
1579c5e4d11fSDmitry Karpeev   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1580c5e4d11fSDmitry Karpeev 
1581c5e4d11fSDmitry Karpeev   PetscFunctionBegin;
15829566063dSJacob Faibussowitsch   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1583c5e4d11fSDmitry Karpeev   if (ghosts) *ghosts = aij->garray;
1584c5e4d11fSDmitry Karpeev   PetscFunctionReturn(0);
1585c5e4d11fSDmitry Karpeev }
1586c5e4d11fSDmitry Karpeev 
15879371c9d4SSatish Balay PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1588a66be287SLois Curfman McInnes   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1589a66be287SLois Curfman McInnes   Mat            A = mat->A, B = mat->B;
15903966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1591a66be287SLois Curfman McInnes 
15923a40ed3dSBarry Smith   PetscFunctionBegin;
15934e220ebcSLois Curfman McInnes   info->block_size = 1.0;
15949566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
15952205254eSKarl Rupp 
15969371c9d4SSatish Balay   isend[0] = info->nz_used;
15979371c9d4SSatish Balay   isend[1] = info->nz_allocated;
15989371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
15999371c9d4SSatish Balay   isend[3] = info->memory;
16009371c9d4SSatish Balay   isend[4] = info->mallocs;
16012205254eSKarl Rupp 
16029566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
16032205254eSKarl Rupp 
16049371c9d4SSatish Balay   isend[0] += info->nz_used;
16059371c9d4SSatish Balay   isend[1] += info->nz_allocated;
16069371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
16079371c9d4SSatish Balay   isend[3] += info->memory;
16089371c9d4SSatish Balay   isend[4] += info->mallocs;
1609a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
16104e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
16114e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
16124e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
16134e220ebcSLois Curfman McInnes     info->memory       = isend[3];
16144e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1615a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
16161c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
16172205254eSKarl Rupp 
16184e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
16194e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
16204e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
16214e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
16224e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1623a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
16241c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
16252205254eSKarl Rupp 
16264e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
16274e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
16284e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
16294e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
16304e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1631a66be287SLois Curfman McInnes   }
16324e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
16334e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
16344e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
16353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1636a66be287SLois Curfman McInnes }
1637a66be287SLois Curfman McInnes 
16389371c9d4SSatish Balay PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) {
1639c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1640c74985f6SBarry Smith 
16413a40ed3dSBarry Smith   PetscFunctionBegin;
164212c028f9SKris Buschelman   switch (op) {
1643512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
164412c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
164528b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1646a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
164712c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
16480ad02fcaSStefano Zampini   case MAT_USE_INODES:
164912c028f9SKris Buschelman   case MAT_IGNORE_ZERO_ENTRIES:
16501a2c6b5cSJunchao Zhang   case MAT_FORM_EXPLICIT_TRANSPOSE:
1651fa1f0d2cSMatthew G Knepley     MatCheckPreallocated(A, 1);
16529566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
16539566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
165412c028f9SKris Buschelman     break;
165512c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
165643674050SBarry Smith     MatCheckPreallocated(A, 1);
16574e0d8c25SBarry Smith     a->roworiented = flg;
16582205254eSKarl Rupp 
16599566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
16609566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
166112c028f9SKris Buschelman     break;
16628c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
16639371c9d4SSatish Balay   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
16649371c9d4SSatish Balay   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1665c8ca1fbcSVaclav Hapla   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1666ffa07934SHong Zhang   case MAT_SPD:
166777e54ba9SKris Buschelman   case MAT_SYMMETRIC:
166877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
1669bf108f30SBarry Smith   case MAT_HERMITIAN:
1670bf108f30SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1671b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1672b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1673b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
167477e54ba9SKris Buschelman     break;
16759371c9d4SSatish Balay   case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break;
1676957cac9fSHong Zhang   case MAT_STRUCTURE_ONLY:
1677957cac9fSHong Zhang     /* The option is handled directly by MatSetOption() */
1678957cac9fSHong Zhang     break;
16799371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16803a40ed3dSBarry Smith   }
16813a40ed3dSBarry Smith   PetscFunctionReturn(0);
1682c74985f6SBarry Smith }
1683c74985f6SBarry Smith 
16849371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1685154123eaSLois Curfman McInnes   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
168687828ca2SBarry Smith   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1687d0f46423SBarry Smith   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1688d0f46423SBarry Smith   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1689b1d57f15SBarry Smith   PetscInt    *cmap, *idx_p;
169039e00950SLois Curfman McInnes 
16913a40ed3dSBarry Smith   PetscFunctionBegin;
169228b400f6SJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
16937a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
16947a0afa10SBarry Smith 
169570f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
16967a0afa10SBarry Smith     /*
16977a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
16987a0afa10SBarry Smith     */
16997a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1700b1d57f15SBarry Smith     PetscInt    max = 1, tmp;
1701d0f46423SBarry Smith     for (i = 0; i < matin->rmap->n; i++) {
17027a0afa10SBarry Smith       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
17032205254eSKarl Rupp       if (max < tmp) max = tmp;
17047a0afa10SBarry Smith     }
17059566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
17067a0afa10SBarry Smith   }
17077a0afa10SBarry Smith 
1708aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1709abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
171039e00950SLois Curfman McInnes 
17119371c9d4SSatish Balay   pvA = &vworkA;
17129371c9d4SSatish Balay   pcA = &cworkA;
17139371c9d4SSatish Balay   pvB = &vworkB;
17149371c9d4SSatish Balay   pcB = &cworkB;
17159371c9d4SSatish Balay   if (!v) {
17169371c9d4SSatish Balay     pvA = NULL;
17179371c9d4SSatish Balay     pvB = NULL;
17189371c9d4SSatish Balay   }
17199371c9d4SSatish Balay   if (!idx) {
17209371c9d4SSatish Balay     pcA = NULL;
17219371c9d4SSatish Balay     if (!v) pcB = NULL;
17229371c9d4SSatish Balay   }
17239566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
17249566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1725154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1726154123eaSLois Curfman McInnes 
172770f0671dSBarry Smith   cmap = mat->garray;
1728154123eaSLois Curfman McInnes   if (v || idx) {
1729154123eaSLois Curfman McInnes     if (nztot) {
1730154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1731b1d57f15SBarry Smith       PetscInt imark = -1;
1732154123eaSLois Curfman McInnes       if (v) {
173370f0671dSBarry Smith         *v = v_p = mat->rowvalues;
173439e00950SLois Curfman McInnes         for (i = 0; i < nzB; i++) {
173570f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1736154123eaSLois Curfman McInnes           else break;
1737154123eaSLois Curfman McInnes         }
1738154123eaSLois Curfman McInnes         imark = i;
173970f0671dSBarry Smith         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
174070f0671dSBarry Smith         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1741154123eaSLois Curfman McInnes       }
1742154123eaSLois Curfman McInnes       if (idx) {
174370f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
174470f0671dSBarry Smith         if (imark > -1) {
1745ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
174670f0671dSBarry Smith         } else {
1747154123eaSLois Curfman McInnes           for (i = 0; i < nzB; i++) {
174870f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1749154123eaSLois Curfman McInnes             else break;
1750154123eaSLois Curfman McInnes           }
1751154123eaSLois Curfman McInnes           imark = i;
175270f0671dSBarry Smith         }
175370f0671dSBarry Smith         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
175470f0671dSBarry Smith         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
175539e00950SLois Curfman McInnes       }
17563f97c4b0SBarry Smith     } else {
1757f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1758f4259b30SLisandro Dalcin       if (v) *v = NULL;
17591ca473b0SSatish Balay     }
1760154123eaSLois Curfman McInnes   }
176139e00950SLois Curfman McInnes   *nz = nztot;
17629566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
17639566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
17643a40ed3dSBarry Smith   PetscFunctionReturn(0);
176539e00950SLois Curfman McInnes }
176639e00950SLois Curfman McInnes 
17679371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
17687a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
17693a40ed3dSBarry Smith 
17703a40ed3dSBarry Smith   PetscFunctionBegin;
177128b400f6SJacob Faibussowitsch   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
17727a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
17733a40ed3dSBarry Smith   PetscFunctionReturn(0);
177439e00950SLois Curfman McInnes }
177539e00950SLois Curfman McInnes 
17769371c9d4SSatish Balay PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) {
1777855ac2c5SLois Curfman McInnes   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1778ec8511deSBarry Smith   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1779d0f46423SBarry Smith   PetscInt         i, j, cstart = mat->cmap->rstart;
1780329f5518SBarry Smith   PetscReal        sum = 0.0;
1781fff043a9SJunchao Zhang   const MatScalar *v, *amata, *bmata;
178204ca555eSLois Curfman McInnes 
17833a40ed3dSBarry Smith   PetscFunctionBegin;
178417699dbbSLois Curfman McInnes   if (aij->size == 1) {
17859566063dSJacob Faibussowitsch     PetscCall(MatNorm(aij->A, type, norm));
178637fa93a5SLois Curfman McInnes   } else {
17879566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
17889566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
178904ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
1790fff043a9SJunchao Zhang       v = amata;
179104ca555eSLois Curfman McInnes       for (i = 0; i < amat->nz; i++) {
17929371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
17939371c9d4SSatish Balay         v++;
179404ca555eSLois Curfman McInnes       }
1795fff043a9SJunchao Zhang       v = bmata;
179604ca555eSLois Curfman McInnes       for (i = 0; i < bmat->nz; i++) {
17979371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
17989371c9d4SSatish Balay         v++;
179904ca555eSLois Curfman McInnes       }
18001c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
18018f1a2a5eSBarry Smith       *norm = PetscSqrtReal(*norm);
18029566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
18033a40ed3dSBarry Smith     } else if (type == NORM_1) { /* max column norm */
1804329f5518SBarry Smith       PetscReal *tmp, *tmp2;
1805b1d57f15SBarry Smith       PetscInt  *jj, *garray = aij->garray;
18069566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
18079566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
180804ca555eSLois Curfman McInnes       *norm = 0.0;
18099371c9d4SSatish Balay       v     = amata;
18109371c9d4SSatish Balay       jj    = amat->j;
181104ca555eSLois Curfman McInnes       for (j = 0; j < amat->nz; j++) {
18129371c9d4SSatish Balay         tmp[cstart + *jj++] += PetscAbsScalar(*v);
18139371c9d4SSatish Balay         v++;
181404ca555eSLois Curfman McInnes       }
18159371c9d4SSatish Balay       v  = bmata;
18169371c9d4SSatish Balay       jj = bmat->j;
181704ca555eSLois Curfman McInnes       for (j = 0; j < bmat->nz; j++) {
18189371c9d4SSatish Balay         tmp[garray[*jj++]] += PetscAbsScalar(*v);
18199371c9d4SSatish Balay         v++;
182004ca555eSLois Curfman McInnes       }
18211c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1822d0f46423SBarry Smith       for (j = 0; j < mat->cmap->N; j++) {
182304ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
182404ca555eSLois Curfman McInnes       }
18259566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp));
18269566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp2));
18279566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
18283a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1829329f5518SBarry Smith       PetscReal ntemp = 0.0;
1830d0f46423SBarry Smith       for (j = 0; j < aij->A->rmap->n; j++) {
1831fff043a9SJunchao Zhang         v   = amata + amat->i[j];
183204ca555eSLois Curfman McInnes         sum = 0.0;
183304ca555eSLois Curfman McInnes         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
18349371c9d4SSatish Balay           sum += PetscAbsScalar(*v);
18359371c9d4SSatish Balay           v++;
183604ca555eSLois Curfman McInnes         }
1837fff043a9SJunchao Zhang         v = bmata + bmat->i[j];
183804ca555eSLois Curfman McInnes         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
18399371c9d4SSatish Balay           sum += PetscAbsScalar(*v);
18409371c9d4SSatish Balay           v++;
184104ca555eSLois Curfman McInnes         }
1842515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
184304ca555eSLois Curfman McInnes       }
18441c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
18459566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1846ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
18479566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
18489566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
184937fa93a5SLois Curfman McInnes   }
18503a40ed3dSBarry Smith   PetscFunctionReturn(0);
1851855ac2c5SLois Curfman McInnes }
1852855ac2c5SLois Curfman McInnes 
18539371c9d4SSatish Balay PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) {
1854a8661f62Sandi selinger   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1855a8661f62Sandi selinger   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1856071fcb05SBarry Smith   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1857071fcb05SBarry Smith   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1858a8661f62Sandi selinger   Mat              B, A_diag, *B_diag;
1859ce496241SStefano Zampini   const MatScalar *pbv, *bv;
1860b7c46309SBarry Smith 
18613a40ed3dSBarry Smith   PetscFunctionBegin;
18627fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
18639371c9d4SSatish Balay   ma = A->rmap->n;
18649371c9d4SSatish Balay   na = A->cmap->n;
18659371c9d4SSatish Balay   mb = a->B->rmap->n;
18669371c9d4SSatish Balay   nb = a->B->cmap->n;
18679371c9d4SSatish Balay   ai = Aloc->i;
18689371c9d4SSatish Balay   aj = Aloc->j;
18699371c9d4SSatish Balay   bi = Bloc->i;
18709371c9d4SSatish Balay   bj = Bloc->j;
1871fc73b1b3SBarry Smith   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
187280bcc5a1SJed Brown     PetscInt            *d_nnz, *g_nnz, *o_nnz;
187380bcc5a1SJed Brown     PetscSFNode         *oloc;
1874713c93b4SJed Brown     PETSC_UNUSED PetscSF sf;
187580bcc5a1SJed Brown 
18769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
187780bcc5a1SJed Brown     /* compute d_nnz for preallocation */
18789566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(d_nnz, na));
1879cbc6b225SStefano Zampini     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
188080bcc5a1SJed Brown     /* compute local off-diagonal contributions */
18819566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(g_nnz, nb));
188280bcc5a1SJed Brown     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
188380bcc5a1SJed Brown     /* map those to global */
18849566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
18859566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
18869566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(sf));
18879566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(o_nnz, na));
188857168dbeSPierre Jolivet     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
188957168dbeSPierre Jolivet     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
18909566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&sf));
1891d4bb536fSBarry Smith 
18929566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
18939566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
18949566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
18959566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
18969566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
18979566063dSJacob Faibussowitsch     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1898fc4dec0aSBarry Smith   } else {
1899fc4dec0aSBarry Smith     B = *matout;
19009566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1901fc4dec0aSBarry Smith   }
1902b7c46309SBarry Smith 
1903f79cb1a0Sandi selinger   b           = (Mat_MPIAIJ *)B->data;
1904a8661f62Sandi selinger   A_diag      = a->A;
1905a8661f62Sandi selinger   B_diag      = &b->A;
1906a8661f62Sandi selinger   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1907a8661f62Sandi selinger   A_diag_ncol = A_diag->cmap->N;
1908a8661f62Sandi selinger   B_diag_ilen = sub_B_diag->ilen;
1909a8661f62Sandi selinger   B_diag_i    = sub_B_diag->i;
1910f79cb1a0Sandi selinger 
1911f79cb1a0Sandi selinger   /* Set ilen for diagonal of B */
1912ad540459SPierre Jolivet   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1913f79cb1a0Sandi selinger 
1914a8661f62Sandi selinger   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1915a8661f62Sandi selinger   very quickly (=without using MatSetValues), because all writes are local. */
19167fb60732SBarry Smith   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
19179566063dSJacob Faibussowitsch   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1918f79cb1a0Sandi selinger 
1919b7c46309SBarry Smith   /* copy over the B part */
19209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[mb], &cols));
19219566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1922ce496241SStefano Zampini   pbv = bv;
1923d0f46423SBarry Smith   row = A->rmap->rstart;
19242205254eSKarl Rupp   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
192561a2fbbaSHong Zhang   cols_tmp = cols;
1926da668accSHong Zhang   for (i = 0; i < mb; i++) {
1927da668accSHong Zhang     ncol = bi[i + 1] - bi[i];
19289566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
19292205254eSKarl Rupp     row++;
19309371c9d4SSatish Balay     pbv += ncol;
19319371c9d4SSatish Balay     cols_tmp += ncol;
1932b7c46309SBarry Smith   }
19339566063dSJacob Faibussowitsch   PetscCall(PetscFree(cols));
19349566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1935fc73b1b3SBarry Smith 
19369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
19379566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1938cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
19390de55854SLois Curfman McInnes     *matout = B;
19400de55854SLois Curfman McInnes   } else {
19419566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &B));
19420de55854SLois Curfman McInnes   }
19433a40ed3dSBarry Smith   PetscFunctionReturn(0);
1944b7c46309SBarry Smith }
1945b7c46309SBarry Smith 
19469371c9d4SSatish Balay PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) {
19474b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19484b967eb1SSatish Balay   Mat         a = aij->A, b = aij->B;
1949b1d57f15SBarry Smith   PetscInt    s1, s2, s3;
1950a008b906SSatish Balay 
19513a40ed3dSBarry Smith   PetscFunctionBegin;
19529566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &s2, &s3));
19534b967eb1SSatish Balay   if (rr) {
19549566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s1));
195508401ef6SPierre Jolivet     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
19564b967eb1SSatish Balay     /* Overlap communication with computation. */
19579566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1958a008b906SSatish Balay   }
19594b967eb1SSatish Balay   if (ll) {
19609566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s1));
196108401ef6SPierre Jolivet     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1962dbbe0bcdSBarry Smith     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
19634b967eb1SSatish Balay   }
19644b967eb1SSatish Balay   /* scale  the diagonal block */
1965dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
19664b967eb1SSatish Balay 
19674b967eb1SSatish Balay   if (rr) {
19684b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
19699566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1970dbbe0bcdSBarry Smith     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
19714b967eb1SSatish Balay   }
19723a40ed3dSBarry Smith   PetscFunctionReturn(0);
1973a008b906SSatish Balay }
1974a008b906SSatish Balay 
19759371c9d4SSatish Balay PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) {
1976bb5a7306SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
19773a40ed3dSBarry Smith 
19783a40ed3dSBarry Smith   PetscFunctionBegin;
19799566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
19803a40ed3dSBarry Smith   PetscFunctionReturn(0);
1981bb5a7306SBarry Smith }
1982bb5a7306SBarry Smith 
19839371c9d4SSatish Balay PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) {
1984d4bb536fSBarry Smith   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
1985d4bb536fSBarry Smith   Mat         a, b, c, d;
1986ace3abfcSBarry Smith   PetscBool   flg;
1987d4bb536fSBarry Smith 
19883a40ed3dSBarry Smith   PetscFunctionBegin;
19899371c9d4SSatish Balay   a = matA->A;
19909371c9d4SSatish Balay   b = matA->B;
19919371c9d4SSatish Balay   c = matB->A;
19929371c9d4SSatish Balay   d = matB->B;
1993d4bb536fSBarry Smith 
19949566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
199548a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
19961c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
19973a40ed3dSBarry Smith   PetscFunctionReturn(0);
1998d4bb536fSBarry Smith }
1999d4bb536fSBarry Smith 
20009371c9d4SSatish Balay PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) {
2001cb5b572fSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2002cb5b572fSBarry Smith   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2003cb5b572fSBarry Smith 
2004cb5b572fSBarry Smith   PetscFunctionBegin;
200533f4a19fSKris Buschelman   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
200633f4a19fSKris Buschelman   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2007cb5b572fSBarry Smith     /* because of the column compression in the off-processor part of the matrix a->B,
2008cb5b572fSBarry Smith        the number of columns in a->B and b->B may be different, hence we cannot call
2009cb5b572fSBarry Smith        the MatCopy() directly on the two parts. If need be, we can provide a more
2010cb5b572fSBarry Smith        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2011cb5b572fSBarry Smith        then copying the submatrices */
20129566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
2013cb5b572fSBarry Smith   } else {
20149566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
20159566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
2016cb5b572fSBarry Smith   }
20179566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2018cb5b572fSBarry Smith   PetscFunctionReturn(0);
2019cb5b572fSBarry Smith }
2020cb5b572fSBarry Smith 
20219371c9d4SSatish Balay PetscErrorCode MatSetUp_MPIAIJ(Mat A) {
2022273d9f13SBarry Smith   PetscFunctionBegin;
20239566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2024273d9f13SBarry Smith   PetscFunctionReturn(0);
2025273d9f13SBarry Smith }
2026273d9f13SBarry Smith 
2027001ddc4fSHong Zhang /*
2028001ddc4fSHong Zhang    Computes the number of nonzeros per row needed for preallocation when X and Y
2029001ddc4fSHong Zhang    have different nonzero structure.
2030001ddc4fSHong Zhang */
20319371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) {
2032001ddc4fSHong Zhang   PetscInt i, j, k, nzx, nzy;
203395b7e79eSJed Brown 
203495b7e79eSJed Brown   PetscFunctionBegin;
203595b7e79eSJed Brown   /* Set the number of nonzeros in the new matrix */
203695b7e79eSJed Brown   for (i = 0; i < m; i++) {
2037001ddc4fSHong Zhang     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2038001ddc4fSHong Zhang     nzx    = xi[i + 1] - xi[i];
2039001ddc4fSHong Zhang     nzy    = yi[i + 1] - yi[i];
204095b7e79eSJed Brown     nnz[i] = 0;
204195b7e79eSJed Brown     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2042001ddc4fSHong Zhang       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2043001ddc4fSHong Zhang       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
204495b7e79eSJed Brown       nnz[i]++;
204595b7e79eSJed Brown     }
204695b7e79eSJed Brown     for (; k < nzy; k++) nnz[i]++;
204795b7e79eSJed Brown   }
204895b7e79eSJed Brown   PetscFunctionReturn(0);
204995b7e79eSJed Brown }
205095b7e79eSJed Brown 
2051001ddc4fSHong Zhang /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
20529371c9d4SSatish Balay static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
2053001ddc4fSHong Zhang   PetscInt    m = Y->rmap->N;
2054001ddc4fSHong Zhang   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2055001ddc4fSHong Zhang   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2056001ddc4fSHong Zhang 
2057001ddc4fSHong Zhang   PetscFunctionBegin;
20589566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2059001ddc4fSHong Zhang   PetscFunctionReturn(0);
2060001ddc4fSHong Zhang }
2061001ddc4fSHong Zhang 
20629371c9d4SSatish Balay PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
2063ac90fabeSBarry Smith   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2064ac90fabeSBarry Smith 
2065ac90fabeSBarry Smith   PetscFunctionBegin;
2066ac90fabeSBarry Smith   if (str == SAME_NONZERO_PATTERN) {
20679566063dSJacob Faibussowitsch     PetscCall(MatAXPY(yy->A, a, xx->A, str));
20689566063dSJacob Faibussowitsch     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2069ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
20709566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
2071ac90fabeSBarry Smith   } else {
20729f5f6813SShri Abhyankar     Mat       B;
20739f5f6813SShri Abhyankar     PetscInt *nnz_d, *nnz_o;
2074d9d719b4SStefano Zampini 
20759566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
20769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
20779566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
20789566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
20799566063dSJacob Faibussowitsch     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
20809566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
20819566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
20829566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
20839566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
20849566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
20859566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
20869566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
20879566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
2088ac90fabeSBarry Smith   }
2089ac90fabeSBarry Smith   PetscFunctionReturn(0);
2090ac90fabeSBarry Smith }
2091ac90fabeSBarry Smith 
20922726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2093354c94deSBarry Smith 
20949371c9d4SSatish Balay PetscErrorCode MatConjugate_MPIAIJ(Mat mat) {
20955f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
20965f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
2097354c94deSBarry Smith     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2098354c94deSBarry Smith 
20999566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqAIJ(aij->A));
21009566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqAIJ(aij->B));
21015f80ce2aSJacob Faibussowitsch   }
2102354c94deSBarry Smith   PetscFunctionReturn(0);
2103354c94deSBarry Smith }
2104354c94deSBarry Smith 
21059371c9d4SSatish Balay PetscErrorCode MatRealPart_MPIAIJ(Mat A) {
210699cafbc1SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
210799cafbc1SBarry Smith 
210899cafbc1SBarry Smith   PetscFunctionBegin;
21099566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
21109566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
211199cafbc1SBarry Smith   PetscFunctionReturn(0);
211299cafbc1SBarry Smith }
211399cafbc1SBarry Smith 
21149371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) {
211599cafbc1SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
211699cafbc1SBarry Smith 
211799cafbc1SBarry Smith   PetscFunctionBegin;
21189566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
21199566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
212099cafbc1SBarry Smith   PetscFunctionReturn(0);
212199cafbc1SBarry Smith }
212299cafbc1SBarry Smith 
21239371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2124c91732d9SHong Zhang   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2125475b8b61SHong Zhang   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2126475b8b61SHong Zhang   PetscScalar       *va, *vv;
2127475b8b61SHong Zhang   Vec                vB, vA;
2128475b8b61SHong Zhang   const PetscScalar *vb;
2129c91732d9SHong Zhang 
2130c91732d9SHong Zhang   PetscFunctionBegin;
21319566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
21329566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2133475b8b61SHong Zhang 
21349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(vA, &va));
2135c91732d9SHong Zhang   if (idx) {
2136475b8b61SHong Zhang     for (i = 0; i < m; i++) {
2137d0f46423SBarry Smith       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2138c91732d9SHong Zhang     }
2139c91732d9SHong Zhang   }
2140c91732d9SHong Zhang 
21419566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
21429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &idxb));
21439566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2144c91732d9SHong Zhang 
21459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &vv));
21469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(vB, &vb));
2147475b8b61SHong Zhang   for (i = 0; i < m; i++) {
2148c91732d9SHong Zhang     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2149475b8b61SHong Zhang       vv[i] = vb[i];
2150c91732d9SHong Zhang       if (idx) idx[i] = a->garray[idxb[i]];
2151475b8b61SHong Zhang     } else {
2152475b8b61SHong Zhang       vv[i] = va[i];
21539371c9d4SSatish Balay       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2154c91732d9SHong Zhang     }
2155c91732d9SHong Zhang   }
21569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA, &vv));
21579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA, &va));
21589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(vB, &vb));
21599566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxb));
21609566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vA));
21619566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vB));
2162c91732d9SHong Zhang   PetscFunctionReturn(0);
2163c91732d9SHong Zhang }
2164c91732d9SHong Zhang 
21659371c9d4SSatish Balay PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2166f07e67edSHong Zhang   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2167f07e67edSHong Zhang   PetscInt           m = A->rmap->n, n = A->cmap->n;
2168f07e67edSHong Zhang   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2169f07e67edSHong Zhang   PetscInt          *cmap = mat->garray;
2170f07e67edSHong Zhang   PetscInt          *diagIdx, *offdiagIdx;
2171f07e67edSHong Zhang   Vec                diagV, offdiagV;
2172ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2173ce496241SStefano Zampini   const PetscScalar *ba, *bav;
2174f07e67edSHong Zhang   PetscInt           r, j, col, ncols, *bi, *bj;
2175f07e67edSHong Zhang   Mat                B = mat->B;
2176f07e67edSHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2177c87e5d42SMatthew Knepley 
2178c87e5d42SMatthew Knepley   PetscFunctionBegin;
2179f07e67edSHong Zhang   /* When a process holds entire A and other processes have no entry */
2180f07e67edSHong Zhang   if (A->cmap->N == n) {
21819566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v, &diagA));
21829566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
21839566063dSJacob Faibussowitsch     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
21849566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
21859566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v, &diagA));
2186f07e67edSHong Zhang     PetscFunctionReturn(0);
2187f07e67edSHong Zhang   } else if (n == 0) {
2188f07e67edSHong Zhang     if (m) {
21899566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v, &a));
21909371c9d4SSatish Balay       for (r = 0; r < m; r++) {
21919371c9d4SSatish Balay         a[r] = 0.0;
21929371c9d4SSatish Balay         if (idx) idx[r] = -1;
21939371c9d4SSatish Balay       }
21949566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v, &a));
2195f07e67edSHong Zhang     }
2196f07e67edSHong Zhang     PetscFunctionReturn(0);
2197f07e67edSHong Zhang   }
2198f07e67edSHong Zhang 
21999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
22009566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
22019566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
22029566063dSJacob Faibussowitsch   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2203f07e67edSHong Zhang 
2204f07e67edSHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
22059566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2206ce496241SStefano Zampini   ba = bav;
2207f07e67edSHong Zhang   bi = b->i;
2208f07e67edSHong Zhang   bj = b->j;
22099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2210f07e67edSHong Zhang   for (r = 0; r < m; r++) {
2211f07e67edSHong Zhang     ncols = bi[r + 1] - bi[r];
2212f07e67edSHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
22139371c9d4SSatish Balay       offdiagA[r]   = *ba;
22149371c9d4SSatish Balay       offdiagIdx[r] = cmap[0];
2215f07e67edSHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2216f07e67edSHong Zhang       offdiagA[r] = 0.0;
2217f07e67edSHong Zhang 
2218f07e67edSHong Zhang       /* Find first hole in the cmap */
2219f07e67edSHong Zhang       for (j = 0; j < ncols; j++) {
2220f07e67edSHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2221f07e67edSHong Zhang         if (col > j && j < cstart) {
2222f07e67edSHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2223f07e67edSHong Zhang           break;
2224f07e67edSHong Zhang         } else if (col > j + n && j >= cstart) {
2225f07e67edSHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2226f07e67edSHong Zhang           break;
2227f07e67edSHong Zhang         }
2228f07e67edSHong Zhang       }
22294e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
2230f07e67edSHong Zhang         /* a hole is outside compressed Bcols */
2231f07e67edSHong Zhang         if (ncols == 0) {
2232f07e67edSHong Zhang           if (cstart) {
2233f07e67edSHong Zhang             offdiagIdx[r] = 0;
2234f07e67edSHong Zhang           } else offdiagIdx[r] = cend;
2235f07e67edSHong Zhang         } else { /* ncols > 0 */
2236f07e67edSHong Zhang           offdiagIdx[r] = cmap[ncols - 1] + 1;
2237f07e67edSHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2238f07e67edSHong Zhang         }
2239f07e67edSHong Zhang       }
2240f07e67edSHong Zhang     }
2241f07e67edSHong Zhang 
2242f07e67edSHong Zhang     for (j = 0; j < ncols; j++) {
22439371c9d4SSatish Balay       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
22449371c9d4SSatish Balay         offdiagA[r]   = *ba;
22459371c9d4SSatish Balay         offdiagIdx[r] = cmap[*bj];
22469371c9d4SSatish Balay       }
22479371c9d4SSatish Balay       ba++;
22489371c9d4SSatish Balay       bj++;
2249f07e67edSHong Zhang     }
2250f07e67edSHong Zhang   }
2251f07e67edSHong Zhang 
22529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
22539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2254f07e67edSHong Zhang   for (r = 0; r < m; ++r) {
2255f07e67edSHong Zhang     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2256f07e67edSHong Zhang       a[r] = diagA[r];
2257f07e67edSHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
2258f07e67edSHong Zhang     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2259f07e67edSHong Zhang       a[r] = diagA[r];
2260c87e5d42SMatthew Knepley       if (idx) {
2261f07e67edSHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2262f07e67edSHong Zhang           idx[r] = cstart + diagIdx[r];
2263f07e67edSHong Zhang         } else idx[r] = offdiagIdx[r];
2264f07e67edSHong Zhang       }
2265f07e67edSHong Zhang     } else {
2266f07e67edSHong Zhang       a[r] = offdiagA[r];
2267f07e67edSHong Zhang       if (idx) idx[r] = offdiagIdx[r];
2268c87e5d42SMatthew Knepley     }
2269c87e5d42SMatthew Knepley   }
22709566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
22719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
22729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
22739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
22749566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
22759566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
22769566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2277c87e5d42SMatthew Knepley   PetscFunctionReturn(0);
2278c87e5d42SMatthew Knepley }
2279c87e5d42SMatthew Knepley 
22809371c9d4SSatish Balay PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
228103bc72f1SMatthew Knepley   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2282fa213d2fSHong Zhang   PetscInt           m = A->rmap->n, n = A->cmap->n;
2283fa213d2fSHong Zhang   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
228403bc72f1SMatthew Knepley   PetscInt          *cmap = mat->garray;
228503bc72f1SMatthew Knepley   PetscInt          *diagIdx, *offdiagIdx;
228603bc72f1SMatthew Knepley   Vec                diagV, offdiagV;
2287ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2288ce496241SStefano Zampini   const PetscScalar *ba, *bav;
2289fa213d2fSHong Zhang   PetscInt           r, j, col, ncols, *bi, *bj;
2290fa213d2fSHong Zhang   Mat                B = mat->B;
2291fa213d2fSHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
229203bc72f1SMatthew Knepley 
229303bc72f1SMatthew Knepley   PetscFunctionBegin;
2294fa213d2fSHong Zhang   /* When a process holds entire A and other processes have no entry */
2295fa213d2fSHong Zhang   if (A->cmap->N == n) {
22969566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v, &diagA));
22979566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
22989566063dSJacob Faibussowitsch     PetscCall(MatGetRowMin(mat->A, diagV, idx));
22999566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
23009566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v, &diagA));
2301fa213d2fSHong Zhang     PetscFunctionReturn(0);
2302fa213d2fSHong Zhang   } else if (n == 0) {
2303fa213d2fSHong Zhang     if (m) {
23049566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v, &a));
23059371c9d4SSatish Balay       for (r = 0; r < m; r++) {
23069371c9d4SSatish Balay         a[r] = PETSC_MAX_REAL;
23079371c9d4SSatish Balay         if (idx) idx[r] = -1;
23089371c9d4SSatish Balay       }
23099566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v, &a));
2310fa213d2fSHong Zhang     }
2311fa213d2fSHong Zhang     PetscFunctionReturn(0);
2312fa213d2fSHong Zhang   }
2313fa213d2fSHong Zhang 
23149566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
23159566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
23169566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
23179566063dSJacob Faibussowitsch   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2318fa213d2fSHong Zhang 
2319fa213d2fSHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
23209566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2321ce496241SStefano Zampini   ba = bav;
2322fa213d2fSHong Zhang   bi = b->i;
2323fa213d2fSHong Zhang   bj = b->j;
23249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2325fa213d2fSHong Zhang   for (r = 0; r < m; r++) {
2326fa213d2fSHong Zhang     ncols = bi[r + 1] - bi[r];
2327fa213d2fSHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
23289371c9d4SSatish Balay       offdiagA[r]   = *ba;
23299371c9d4SSatish Balay       offdiagIdx[r] = cmap[0];
2330fa213d2fSHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2331fa213d2fSHong Zhang       offdiagA[r] = 0.0;
2332fa213d2fSHong Zhang 
2333fa213d2fSHong Zhang       /* Find first hole in the cmap */
2334fa213d2fSHong Zhang       for (j = 0; j < ncols; j++) {
2335fa213d2fSHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2336fa213d2fSHong Zhang         if (col > j && j < cstart) {
2337fa213d2fSHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2338fa213d2fSHong Zhang           break;
2339fa213d2fSHong Zhang         } else if (col > j + n && j >= cstart) {
2340fa213d2fSHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2341fa213d2fSHong Zhang           break;
2342fa213d2fSHong Zhang         }
2343fa213d2fSHong Zhang       }
23444e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
2345fa213d2fSHong Zhang         /* a hole is outside compressed Bcols */
2346fa213d2fSHong Zhang         if (ncols == 0) {
2347fa213d2fSHong Zhang           if (cstart) {
2348fa213d2fSHong Zhang             offdiagIdx[r] = 0;
2349fa213d2fSHong Zhang           } else offdiagIdx[r] = cend;
2350fa213d2fSHong Zhang         } else { /* ncols > 0 */
2351fa213d2fSHong Zhang           offdiagIdx[r] = cmap[ncols - 1] + 1;
2352fa213d2fSHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2353fa213d2fSHong Zhang         }
2354fa213d2fSHong Zhang       }
2355fa213d2fSHong Zhang     }
2356fa213d2fSHong Zhang 
2357fa213d2fSHong Zhang     for (j = 0; j < ncols; j++) {
23589371c9d4SSatish Balay       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
23599371c9d4SSatish Balay         offdiagA[r]   = *ba;
23609371c9d4SSatish Balay         offdiagIdx[r] = cmap[*bj];
23619371c9d4SSatish Balay       }
23629371c9d4SSatish Balay       ba++;
23639371c9d4SSatish Balay       bj++;
2364fa213d2fSHong Zhang     }
2365fa213d2fSHong Zhang   }
2366fa213d2fSHong Zhang 
23679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
23689566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2369fa213d2fSHong Zhang   for (r = 0; r < m; ++r) {
2370fa213d2fSHong Zhang     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
237103bc72f1SMatthew Knepley       a[r] = diagA[r];
2372fa213d2fSHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
2373fa213d2fSHong Zhang     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2374fa213d2fSHong Zhang       a[r] = diagA[r];
2375fa213d2fSHong Zhang       if (idx) {
2376fa213d2fSHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
237703bc72f1SMatthew Knepley           idx[r] = cstart + diagIdx[r];
2378fa213d2fSHong Zhang         } else idx[r] = offdiagIdx[r];
2379fa213d2fSHong Zhang       }
238003bc72f1SMatthew Knepley     } else {
238103bc72f1SMatthew Knepley       a[r] = offdiagA[r];
2382fa213d2fSHong Zhang       if (idx) idx[r] = offdiagIdx[r];
238303bc72f1SMatthew Knepley     }
238403bc72f1SMatthew Knepley   }
23859566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
23869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
23879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
23889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
23899566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
23909566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
23919566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
239203bc72f1SMatthew Knepley   PetscFunctionReturn(0);
239303bc72f1SMatthew Knepley }
239403bc72f1SMatthew Knepley 
23959371c9d4SSatish Balay PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2396c87e5d42SMatthew Knepley   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
23971a254869SHong Zhang   PetscInt           m = A->rmap->n, n = A->cmap->n;
23981a254869SHong Zhang   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2399c87e5d42SMatthew Knepley   PetscInt          *cmap = mat->garray;
2400c87e5d42SMatthew Knepley   PetscInt          *diagIdx, *offdiagIdx;
2401c87e5d42SMatthew Knepley   Vec                diagV, offdiagV;
2402ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2403ce496241SStefano Zampini   const PetscScalar *ba, *bav;
24041a254869SHong Zhang   PetscInt           r, j, col, ncols, *bi, *bj;
24051a254869SHong Zhang   Mat                B = mat->B;
24061a254869SHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2407c87e5d42SMatthew Knepley 
2408c87e5d42SMatthew Knepley   PetscFunctionBegin;
24091a254869SHong Zhang   /* When a process holds entire A and other processes have no entry */
24101a254869SHong Zhang   if (A->cmap->N == n) {
24119566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v, &diagA));
24129566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
24139566063dSJacob Faibussowitsch     PetscCall(MatGetRowMax(mat->A, diagV, idx));
24149566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
24159566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v, &diagA));
24161a254869SHong Zhang     PetscFunctionReturn(0);
24171a254869SHong Zhang   } else if (n == 0) {
24181a254869SHong Zhang     if (m) {
24199566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v, &a));
24209371c9d4SSatish Balay       for (r = 0; r < m; r++) {
24219371c9d4SSatish Balay         a[r] = PETSC_MIN_REAL;
24229371c9d4SSatish Balay         if (idx) idx[r] = -1;
24239371c9d4SSatish Balay       }
24249566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v, &a));
24251a254869SHong Zhang     }
24261a254869SHong Zhang     PetscFunctionReturn(0);
24271a254869SHong Zhang   }
24281a254869SHong Zhang 
24299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
24309566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
24319566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
24329566063dSJacob Faibussowitsch   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
24331a254869SHong Zhang 
24341a254869SHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
24359566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2436ce496241SStefano Zampini   ba = bav;
24371a254869SHong Zhang   bi = b->i;
24381a254869SHong Zhang   bj = b->j;
24399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
24401a254869SHong Zhang   for (r = 0; r < m; r++) {
24411a254869SHong Zhang     ncols = bi[r + 1] - bi[r];
24421a254869SHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
24439371c9d4SSatish Balay       offdiagA[r]   = *ba;
24449371c9d4SSatish Balay       offdiagIdx[r] = cmap[0];
24451a254869SHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
24461a254869SHong Zhang       offdiagA[r] = 0.0;
24471a254869SHong Zhang 
24481a254869SHong Zhang       /* Find first hole in the cmap */
24491a254869SHong Zhang       for (j = 0; j < ncols; j++) {
24501a254869SHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
24511a254869SHong Zhang         if (col > j && j < cstart) {
24521a254869SHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
24531a254869SHong Zhang           break;
24541a254869SHong Zhang         } else if (col > j + n && j >= cstart) {
24551a254869SHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
24561a254869SHong Zhang           break;
24571a254869SHong Zhang         }
24581a254869SHong Zhang       }
24594e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
24601a254869SHong Zhang         /* a hole is outside compressed Bcols */
24611a254869SHong Zhang         if (ncols == 0) {
24621a254869SHong Zhang           if (cstart) {
24631a254869SHong Zhang             offdiagIdx[r] = 0;
24641a254869SHong Zhang           } else offdiagIdx[r] = cend;
24651a254869SHong Zhang         } else { /* ncols > 0 */
24661a254869SHong Zhang           offdiagIdx[r] = cmap[ncols - 1] + 1;
24671a254869SHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
24681a254869SHong Zhang         }
24691a254869SHong Zhang       }
24701a254869SHong Zhang     }
24711a254869SHong Zhang 
24721a254869SHong Zhang     for (j = 0; j < ncols; j++) {
24739371c9d4SSatish Balay       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
24749371c9d4SSatish Balay         offdiagA[r]   = *ba;
24759371c9d4SSatish Balay         offdiagIdx[r] = cmap[*bj];
24769371c9d4SSatish Balay       }
24779371c9d4SSatish Balay       ba++;
24789371c9d4SSatish Balay       bj++;
24791a254869SHong Zhang     }
24801a254869SHong Zhang   }
24811a254869SHong Zhang 
24829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
24839566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
24841a254869SHong Zhang   for (r = 0; r < m; ++r) {
24851a254869SHong Zhang     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2486c87e5d42SMatthew Knepley       a[r] = diagA[r];
24871a254869SHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
24881a254869SHong Zhang     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
24891a254869SHong Zhang       a[r] = diagA[r];
24901a254869SHong Zhang       if (idx) {
24911a254869SHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2492c87e5d42SMatthew Knepley           idx[r] = cstart + diagIdx[r];
24931a254869SHong Zhang         } else idx[r] = offdiagIdx[r];
24941a254869SHong Zhang       }
2495c87e5d42SMatthew Knepley     } else {
2496c87e5d42SMatthew Knepley       a[r] = offdiagA[r];
24971a254869SHong Zhang       if (idx) idx[r] = offdiagIdx[r];
2498c87e5d42SMatthew Knepley     }
2499c87e5d42SMatthew Knepley   }
25009566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
25019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
25029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
25039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
25049566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
25059566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
25069566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2507c87e5d42SMatthew Knepley   PetscFunctionReturn(0);
2508c87e5d42SMatthew Knepley }
2509c87e5d42SMatthew Knepley 
25109371c9d4SSatish Balay PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) {
2511f6d58c54SBarry Smith   Mat *dummy;
25125494a064SHong Zhang 
25135494a064SHong Zhang   PetscFunctionBegin;
25149566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2515f6d58c54SBarry Smith   *newmat = *dummy;
25169566063dSJacob Faibussowitsch   PetscCall(PetscFree(dummy));
25175494a064SHong Zhang   PetscFunctionReturn(0);
25185494a064SHong Zhang }
25195494a064SHong Zhang 
25209371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) {
2521bbead8a2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2522bbead8a2SBarry Smith 
2523bbead8a2SBarry Smith   PetscFunctionBegin;
25249566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(a->A, values));
25257b6c816cSBarry Smith   A->factorerrortype = a->A->factorerrortype;
2526bbead8a2SBarry Smith   PetscFunctionReturn(0);
2527bbead8a2SBarry Smith }
2528bbead8a2SBarry Smith 
25299371c9d4SSatish Balay static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) {
253073a71a0fSBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
253173a71a0fSBarry Smith 
253273a71a0fSBarry Smith   PetscFunctionBegin;
253308401ef6SPierre Jolivet   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
25349566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(aij->A, rctx));
2535679944adSJunchao Zhang   if (x->assembled) {
25369566063dSJacob Faibussowitsch     PetscCall(MatSetRandom(aij->B, rctx));
2537679944adSJunchao Zhang   } else {
25389566063dSJacob Faibussowitsch     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2539679944adSJunchao Zhang   }
25409566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
25419566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
254273a71a0fSBarry Smith   PetscFunctionReturn(0);
254373a71a0fSBarry Smith }
2544bbead8a2SBarry Smith 
25459371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) {
2546b1b1104fSBarry Smith   PetscFunctionBegin;
2547b1b1104fSBarry Smith   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2548b1b1104fSBarry Smith   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2549b1b1104fSBarry Smith   PetscFunctionReturn(0);
2550b1b1104fSBarry Smith }
2551b1b1104fSBarry Smith 
2552b1b1104fSBarry Smith /*@
2553f2afee66SBarry Smith    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2554f2afee66SBarry Smith 
2555f2afee66SBarry Smith    Not collective
2556f2afee66SBarry Smith 
2557f2afee66SBarry Smith    Input Parameter:
2558f2afee66SBarry Smith .    A - the matrix
2559f2afee66SBarry Smith 
2560f2afee66SBarry Smith    Output Parameter:
2561f2afee66SBarry Smith .    nz - the number of nonzeros
2562f2afee66SBarry Smith 
2563f2afee66SBarry Smith  Level: advanced
2564f2afee66SBarry Smith 
256511a5261eSBarry Smith .seealso: `MATMPIAIJ`, `Mat`
2566f2afee66SBarry Smith @*/
25679371c9d4SSatish Balay PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) {
2568f2afee66SBarry Smith   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2569f2afee66SBarry Smith   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2570f2afee66SBarry Smith 
2571f2afee66SBarry Smith   PetscFunctionBegin;
2572f2afee66SBarry Smith   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2573f2afee66SBarry Smith   PetscFunctionReturn(0);
2574f2afee66SBarry Smith }
2575f2afee66SBarry Smith 
2576f2afee66SBarry Smith /*@
2577b1b1104fSBarry Smith    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2578b1b1104fSBarry Smith 
257911a5261eSBarry Smith    Collective on A
2580b1b1104fSBarry Smith 
2581b1b1104fSBarry Smith    Input Parameters:
2582b1b1104fSBarry Smith +    A - the matrix
258311a5261eSBarry Smith -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2584b1b1104fSBarry Smith 
258596a0c994SBarry Smith  Level: advanced
258696a0c994SBarry Smith 
2587b1b1104fSBarry Smith @*/
25889371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) {
2589b1b1104fSBarry Smith   PetscFunctionBegin;
2590cac4c232SBarry Smith   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2591b1b1104fSBarry Smith   PetscFunctionReturn(0);
2592b1b1104fSBarry Smith }
2593b1b1104fSBarry Smith 
25949371c9d4SSatish Balay PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) {
2595b1b1104fSBarry Smith   PetscBool sc = PETSC_FALSE, flg;
2596b1b1104fSBarry Smith 
2597b1b1104fSBarry Smith   PetscFunctionBegin;
2598d0609cedSBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2599b1b1104fSBarry Smith   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
26009566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
26011baa6e33SBarry Smith   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2602d0609cedSBarry Smith   PetscOptionsHeadEnd();
2603b1b1104fSBarry Smith   PetscFunctionReturn(0);
2604b1b1104fSBarry Smith }
2605b1b1104fSBarry Smith 
26069371c9d4SSatish Balay PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) {
26077d68702bSBarry Smith   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2608c5e4d11fSDmitry Karpeev   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
26097d68702bSBarry Smith 
26107d68702bSBarry Smith   PetscFunctionBegin;
2611c5e4d11fSDmitry Karpeev   if (!Y->preallocated) {
26129566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
26135519a089SJose E. Roman   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2614b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
26159566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2616b83222d8SBarry Smith     aij->nonew = nonew;
26177d68702bSBarry Smith   }
26189566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
26197d68702bSBarry Smith   PetscFunctionReturn(0);
26207d68702bSBarry Smith }
26217d68702bSBarry Smith 
26229371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) {
26233b49f96aSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
26243b49f96aSBarry Smith 
26253b49f96aSBarry Smith   PetscFunctionBegin;
262608401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
26279566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
26283b49f96aSBarry Smith   if (d) {
26293b49f96aSBarry Smith     PetscInt rstart;
26309566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
26313b49f96aSBarry Smith     *d += rstart;
26323b49f96aSBarry Smith   }
26333b49f96aSBarry Smith   PetscFunctionReturn(0);
26343b49f96aSBarry Smith }
26353b49f96aSBarry Smith 
26369371c9d4SSatish Balay PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) {
2637a8ee9fb5SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2638a8ee9fb5SBarry Smith 
2639a8ee9fb5SBarry Smith   PetscFunctionBegin;
26409566063dSJacob Faibussowitsch   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2641a8ee9fb5SBarry Smith   PetscFunctionReturn(0);
2642a8ee9fb5SBarry Smith }
26433b49f96aSBarry Smith 
26448a729477SBarry Smith /* -------------------------------------------------------------------*/
2645cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2646cda55fadSBarry Smith                                        MatGetRow_MPIAIJ,
2647cda55fadSBarry Smith                                        MatRestoreRow_MPIAIJ,
2648cda55fadSBarry Smith                                        MatMult_MPIAIJ,
264997304618SKris Buschelman                                        /* 4*/ MatMultAdd_MPIAIJ,
26507c922b88SBarry Smith                                        MatMultTranspose_MPIAIJ,
26517c922b88SBarry Smith                                        MatMultTransposeAdd_MPIAIJ,
2652f4259b30SLisandro Dalcin                                        NULL,
2653f4259b30SLisandro Dalcin                                        NULL,
2654f4259b30SLisandro Dalcin                                        NULL,
2655f4259b30SLisandro Dalcin                                        /*10*/ NULL,
2656f4259b30SLisandro Dalcin                                        NULL,
2657f4259b30SLisandro Dalcin                                        NULL,
265841f059aeSBarry Smith                                        MatSOR_MPIAIJ,
2659b7c46309SBarry Smith                                        MatTranspose_MPIAIJ,
266097304618SKris Buschelman                                        /*15*/ MatGetInfo_MPIAIJ,
2661cda55fadSBarry Smith                                        MatEqual_MPIAIJ,
2662cda55fadSBarry Smith                                        MatGetDiagonal_MPIAIJ,
2663cda55fadSBarry Smith                                        MatDiagonalScale_MPIAIJ,
2664cda55fadSBarry Smith                                        MatNorm_MPIAIJ,
266597304618SKris Buschelman                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2666cda55fadSBarry Smith                                        MatAssemblyEnd_MPIAIJ,
2667cda55fadSBarry Smith                                        MatSetOption_MPIAIJ,
2668cda55fadSBarry Smith                                        MatZeroEntries_MPIAIJ,
2669d519adbfSMatthew Knepley                                        /*24*/ MatZeroRows_MPIAIJ,
2670f4259b30SLisandro Dalcin                                        NULL,
2671f4259b30SLisandro Dalcin                                        NULL,
2672f4259b30SLisandro Dalcin                                        NULL,
2673f4259b30SLisandro Dalcin                                        NULL,
26744994cf47SJed Brown                                        /*29*/ MatSetUp_MPIAIJ,
2675f4259b30SLisandro Dalcin                                        NULL,
2676f4259b30SLisandro Dalcin                                        NULL,
2677a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIAIJ,
2678f4259b30SLisandro Dalcin                                        NULL,
2679d519adbfSMatthew Knepley                                        /*34*/ MatDuplicate_MPIAIJ,
2680f4259b30SLisandro Dalcin                                        NULL,
2681f4259b30SLisandro Dalcin                                        NULL,
2682f4259b30SLisandro Dalcin                                        NULL,
2683f4259b30SLisandro Dalcin                                        NULL,
2684d519adbfSMatthew Knepley                                        /*39*/ MatAXPY_MPIAIJ,
26857dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIAIJ,
2686cda55fadSBarry Smith                                        MatIncreaseOverlap_MPIAIJ,
2687cda55fadSBarry Smith                                        MatGetValues_MPIAIJ,
2688cb5b572fSBarry Smith                                        MatCopy_MPIAIJ,
2689d519adbfSMatthew Knepley                                        /*44*/ MatGetRowMax_MPIAIJ,
2690cda55fadSBarry Smith                                        MatScale_MPIAIJ,
26917d68702bSBarry Smith                                        MatShift_MPIAIJ,
269299e65526SBarry Smith                                        MatDiagonalSet_MPIAIJ,
2693564f14d6SBarry Smith                                        MatZeroRowsColumns_MPIAIJ,
269473a71a0fSBarry Smith                                        /*49*/ MatSetRandom_MPIAIJ,
26958a9c020eSBarry Smith                                        MatGetRowIJ_MPIAIJ,
26968a9c020eSBarry Smith                                        MatRestoreRowIJ_MPIAIJ,
2697f4259b30SLisandro Dalcin                                        NULL,
2698f4259b30SLisandro Dalcin                                        NULL,
269993dfae19SHong Zhang                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2700f4259b30SLisandro Dalcin                                        NULL,
2701cda55fadSBarry Smith                                        MatSetUnfactored_MPIAIJ,
270272e6a0cfSJed Brown                                        MatPermute_MPIAIJ,
2703f4259b30SLisandro Dalcin                                        NULL,
27047dae84e0SHong Zhang                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2705e03a110bSBarry Smith                                        MatDestroy_MPIAIJ,
2706e03a110bSBarry Smith                                        MatView_MPIAIJ,
2707f4259b30SLisandro Dalcin                                        NULL,
2708f4259b30SLisandro Dalcin                                        NULL,
2709f4259b30SLisandro Dalcin                                        /*64*/ NULL,
2710f996eeb8SHong Zhang                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2711f4259b30SLisandro Dalcin                                        NULL,
2712f4259b30SLisandro Dalcin                                        NULL,
2713f4259b30SLisandro Dalcin                                        NULL,
2714d519adbfSMatthew Knepley                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2715c87e5d42SMatthew Knepley                                        MatGetRowMinAbs_MPIAIJ,
2716f4259b30SLisandro Dalcin                                        NULL,
2717f4259b30SLisandro Dalcin                                        NULL,
2718f4259b30SLisandro Dalcin                                        NULL,
2719f4259b30SLisandro Dalcin                                        NULL,
27203acb8795SBarry Smith                                        /*75*/ MatFDColoringApply_AIJ,
2721b1b1104fSBarry Smith                                        MatSetFromOptions_MPIAIJ,
2722f4259b30SLisandro Dalcin                                        NULL,
2723f4259b30SLisandro Dalcin                                        NULL,
2724f1f41ecbSJed Brown                                        MatFindZeroDiagonals_MPIAIJ,
2725f4259b30SLisandro Dalcin                                        /*80*/ NULL,
2726f4259b30SLisandro Dalcin                                        NULL,
2727f4259b30SLisandro Dalcin                                        NULL,
27285bba2384SShri Abhyankar                                        /*83*/ MatLoad_MPIAIJ,
2729a3bbdb47SHong Zhang                                        MatIsSymmetric_MPIAIJ,
2730f4259b30SLisandro Dalcin                                        NULL,
2731f4259b30SLisandro Dalcin                                        NULL,
2732f4259b30SLisandro Dalcin                                        NULL,
2733f4259b30SLisandro Dalcin                                        NULL,
2734f4259b30SLisandro Dalcin                                        /*89*/ NULL,
2735f4259b30SLisandro Dalcin                                        NULL,
273626be0446SHong Zhang                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2737f4259b30SLisandro Dalcin                                        NULL,
2738f4259b30SLisandro Dalcin                                        NULL,
2739cf3ca8ceSHong Zhang                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2740f4259b30SLisandro Dalcin                                        NULL,
2741f4259b30SLisandro Dalcin                                        NULL,
2742f4259b30SLisandro Dalcin                                        NULL,
2743b470e4b4SRichard Tran Mills                                        MatBindToCPU_MPIAIJ,
27444222ddf1SHong Zhang                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2745f4259b30SLisandro Dalcin                                        NULL,
2746f4259b30SLisandro Dalcin                                        NULL,
27472fd7e33dSBarry Smith                                        MatConjugate_MPIAIJ,
2748f4259b30SLisandro Dalcin                                        NULL,
2749d519adbfSMatthew Knepley                                        /*104*/ MatSetValuesRow_MPIAIJ,
275099cafbc1SBarry Smith                                        MatRealPart_MPIAIJ,
275169db28dcSHong Zhang                                        MatImaginaryPart_MPIAIJ,
2752f4259b30SLisandro Dalcin                                        NULL,
2753f4259b30SLisandro Dalcin                                        NULL,
2754f4259b30SLisandro Dalcin                                        /*109*/ NULL,
2755f4259b30SLisandro Dalcin                                        NULL,
27565494a064SHong Zhang                                        MatGetRowMin_MPIAIJ,
2757f4259b30SLisandro Dalcin                                        NULL,
27583b49f96aSBarry Smith                                        MatMissingDiagonal_MPIAIJ,
2759d1adec66SJed Brown                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2760f4259b30SLisandro Dalcin                                        NULL,
2761c5e4d11fSDmitry Karpeev                                        MatGetGhosts_MPIAIJ,
2762f4259b30SLisandro Dalcin                                        NULL,
2763f4259b30SLisandro Dalcin                                        NULL,
2764b215bc84SStefano Zampini                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2765f4259b30SLisandro Dalcin                                        NULL,
2766f4259b30SLisandro Dalcin                                        NULL,
2767f4259b30SLisandro Dalcin                                        NULL,
2768b9614d88SDmitry Karpeev                                        MatGetMultiProcBlock_MPIAIJ,
2769f2c98031SJed Brown                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2770a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIAIJ,
2771bbead8a2SBarry Smith                                        MatInvertBlockDiagonal_MPIAIJ,
2772a8ee9fb5SBarry Smith                                        MatInvertVariableBlockDiagonal_MPIAIJ,
27737dae84e0SHong Zhang                                        MatCreateSubMatricesMPI_MPIAIJ,
2774f4259b30SLisandro Dalcin                                        /*129*/ NULL,
2775f4259b30SLisandro Dalcin                                        NULL,
2776f4259b30SLisandro Dalcin                                        NULL,
2777187b3c17SHong Zhang                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2778f4259b30SLisandro Dalcin                                        NULL,
2779f4259b30SLisandro Dalcin                                        /*134*/ NULL,
2780f4259b30SLisandro Dalcin                                        NULL,
2781f4259b30SLisandro Dalcin                                        NULL,
2782f4259b30SLisandro Dalcin                                        NULL,
2783f4259b30SLisandro Dalcin                                        NULL,
278446533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2785f4259b30SLisandro Dalcin                                        NULL,
2786f4259b30SLisandro Dalcin                                        NULL,
27879c8f2541SHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2788a0b6529bSBarry Smith                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
27894222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2790f4259b30SLisandro Dalcin                                        /*145*/ NULL,
2791f4259b30SLisandro Dalcin                                        NULL,
279272833a62Smarkadams4                                        NULL,
279372833a62Smarkadams4                                        MatCreateGraph_Simple_AIJ,
27947fb60732SBarry Smith                                        MatFilter_AIJ,
27959371c9d4SSatish Balay                                        /*150*/ NULL};
279636ce4990SBarry Smith 
27972e8a6d31SBarry Smith /* ----------------------------------------------------------------------------------------*/
27982e8a6d31SBarry Smith 
27999371c9d4SSatish Balay PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) {
28002e8a6d31SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
28012e8a6d31SBarry Smith 
28022e8a6d31SBarry Smith   PetscFunctionBegin;
28039566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
28049566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
28052e8a6d31SBarry Smith   PetscFunctionReturn(0);
28062e8a6d31SBarry Smith }
28072e8a6d31SBarry Smith 
28089371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) {
28092e8a6d31SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
28102e8a6d31SBarry Smith 
28112e8a6d31SBarry Smith   PetscFunctionBegin;
28129566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
28139566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
28142e8a6d31SBarry Smith   PetscFunctionReturn(0);
28152e8a6d31SBarry Smith }
28168a729477SBarry Smith 
28179371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2818a23d5eceSKris Buschelman   Mat_MPIAIJ *b;
28195d2a9ed1SStefano Zampini   PetscMPIInt size;
2820a23d5eceSKris Buschelman 
2821a23d5eceSKris Buschelman   PetscFunctionBegin;
28229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
28239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
2824a23d5eceSKris Buschelman   b = (Mat_MPIAIJ *)B->data;
2825899cda47SBarry Smith 
2826cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
28279566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
2828cb7b82ddSBarry Smith #else
28299566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2830cb7b82ddSBarry Smith #endif
28319566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
28329566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
28339566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2834cb7b82ddSBarry Smith 
2835cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
28369566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
28379566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
28389566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
28399566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
28409566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
28419566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQAIJ));
2842cb7b82ddSBarry Smith 
2843cb7b82ddSBarry Smith   if (!B->preallocated) {
28449566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
28459566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
28469566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
28479566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A, MATSEQAIJ));
2848526dfc15SBarry Smith   }
2849899cda47SBarry Smith 
28509566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
28519566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2852526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2853cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
285415001458SStefano Zampini   B->assembled     = PETSC_FALSE;
2855a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2856a23d5eceSKris Buschelman }
2857a23d5eceSKris Buschelman 
28589371c9d4SSatish Balay PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) {
2859846b4da1SFande Kong   Mat_MPIAIJ *b;
2860846b4da1SFande Kong 
2861846b4da1SFande Kong   PetscFunctionBegin;
2862846b4da1SFande Kong   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
28639566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
28649566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
2865846b4da1SFande Kong   b = (Mat_MPIAIJ *)B->data;
2866846b4da1SFande Kong 
2867846b4da1SFande Kong #if defined(PETSC_USE_CTABLE)
28689566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
2869846b4da1SFande Kong #else
28709566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2871846b4da1SFande Kong #endif
28729566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
28739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
28749566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2875846b4da1SFande Kong 
28769566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocation(b->A));
28779566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocation(b->B));
2878846b4da1SFande Kong   B->preallocated  = PETSC_TRUE;
2879846b4da1SFande Kong   B->was_assembled = PETSC_FALSE;
2880846b4da1SFande Kong   B->assembled     = PETSC_FALSE;
2881846b4da1SFande Kong   PetscFunctionReturn(0);
2882846b4da1SFande Kong }
2883846b4da1SFande Kong 
28849371c9d4SSatish Balay PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
2885d6dfbf8fSBarry Smith   Mat         mat;
2886416022c9SBarry Smith   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2887d6dfbf8fSBarry Smith 
28883a40ed3dSBarry Smith   PetscFunctionBegin;
2889f4259b30SLisandro Dalcin   *newmat = NULL;
28909566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
28919566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
28929566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
28939566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2894273d9f13SBarry Smith   a = (Mat_MPIAIJ *)mat->data;
2895e1b6402fSHong Zhang 
2896d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2897501880eeSStefano Zampini   mat->assembled    = matin->assembled;
2898e7641de0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2899501880eeSStefano Zampini   mat->preallocated = matin->preallocated;
2900d6dfbf8fSBarry Smith 
290117699dbbSLois Curfman McInnes   a->size         = oldmat->size;
290217699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
2903e7641de0SSatish Balay   a->donotstash   = oldmat->donotstash;
2904e7641de0SSatish Balay   a->roworiented  = oldmat->roworiented;
2905501880eeSStefano Zampini   a->rowindices   = NULL;
2906501880eeSStefano Zampini   a->rowvalues    = NULL;
2907bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
2908d6dfbf8fSBarry Smith 
29099566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
29109566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2911899cda47SBarry Smith 
29122ee70a88SLois Curfman McInnes   if (oldmat->colmap) {
2913aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
29149566063dSJacob Faibussowitsch     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2915b1fc9764SSatish Balay #else
29169566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
29179566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2918b1fc9764SSatish Balay #endif
2919501880eeSStefano Zampini   } else a->colmap = NULL;
29203f41c07dSBarry Smith   if (oldmat->garray) {
2921b1d57f15SBarry Smith     PetscInt len;
2922d0f46423SBarry Smith     len = oldmat->B->cmap->n;
29239566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len + 1, &a->garray));
29249566063dSJacob Faibussowitsch     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2925501880eeSStefano Zampini   } else a->garray = NULL;
2926d6dfbf8fSBarry Smith 
29270de76c62SStefano Zampini   /* It may happen MatDuplicate is called with a non-assembled matrix
29280de76c62SStefano Zampini      In fact, MatDuplicate only requires the matrix to be preallocated
29290de76c62SStefano Zampini      This may happen inside a DMCreateMatrix_Shell */
2930*4dfa11a4SJacob Faibussowitsch   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
2931*4dfa11a4SJacob Faibussowitsch   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
29329566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
29339566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
29349566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
29358a729477SBarry Smith   *newmat = mat;
29363a40ed3dSBarry Smith   PetscFunctionReturn(0);
29378a729477SBarry Smith }
2938416022c9SBarry Smith 
29399371c9d4SSatish Balay PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) {
294052f91c60SVaclav Hapla   PetscBool isbinary, ishdf5;
294152f91c60SVaclav Hapla 
294252f91c60SVaclav Hapla   PetscFunctionBegin;
294352f91c60SVaclav Hapla   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
294452f91c60SVaclav Hapla   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2945c27b3999SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
29469566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
29479566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
29489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
294952f91c60SVaclav Hapla   if (isbinary) {
29509566063dSJacob Faibussowitsch     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
295152f91c60SVaclav Hapla   } else if (ishdf5) {
295252f91c60SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
29539566063dSJacob Faibussowitsch     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
295452f91c60SVaclav Hapla #else
295552f91c60SVaclav Hapla     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
295652f91c60SVaclav Hapla #endif
295752f91c60SVaclav Hapla   } else {
295898921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
295952f91c60SVaclav Hapla   }
296052f91c60SVaclav Hapla   PetscFunctionReturn(0);
296152f91c60SVaclav Hapla }
296252f91c60SVaclav Hapla 
29639371c9d4SSatish Balay PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
29643ea6fe3dSLisandro Dalcin   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
29653ea6fe3dSLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
29663ea6fe3dSLisandro Dalcin   PetscScalar *matvals;
29678fb81238SShri Abhyankar 
29688fb81238SShri Abhyankar   PetscFunctionBegin;
29699566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
29708fb81238SShri Abhyankar 
29713ea6fe3dSLisandro Dalcin   /* read in matrix header */
29729566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
297308401ef6SPierre Jolivet   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
29749371c9d4SSatish Balay   M  = header[1];
29759371c9d4SSatish Balay   N  = header[2];
29769371c9d4SSatish Balay   nz = header[3];
297708401ef6SPierre Jolivet   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
297808401ef6SPierre Jolivet   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
297908401ef6SPierre Jolivet   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
298008ea439dSMark F. Adams 
29813ea6fe3dSLisandro Dalcin   /* set block sizes from the viewer's .info file */
29829566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
29833ea6fe3dSLisandro Dalcin   /* set global sizes if not set already */
29843ea6fe3dSLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
29853ea6fe3dSLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
29869566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
29879566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
29888fb81238SShri Abhyankar 
29893ea6fe3dSLisandro Dalcin   /* check if the matrix sizes are correct */
29909566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
2991aed4548fSBarry Smith   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
29928fb81238SShri Abhyankar 
29933ea6fe3dSLisandro Dalcin   /* read in row lengths and build row indices */
29949566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, NULL));
29959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
29969566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
29979371c9d4SSatish Balay   rowidxs[0] = 0;
29989371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
29991c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
300008401ef6SPierre Jolivet   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
30013ea6fe3dSLisandro Dalcin   /* read in column indices and matrix values */
30029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
30039566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
30049566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
30053ea6fe3dSLisandro Dalcin   /* store matrix indices and values */
30069566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
30079566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
30089566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
30098fb81238SShri Abhyankar   PetscFunctionReturn(0);
30108fb81238SShri Abhyankar }
30118fb81238SShri Abhyankar 
30123782ecc7SHong Zhang /* Not scalable because of ISAllGather() unless getting all columns. */
30139371c9d4SSatish Balay PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) {
30144aa3045dSJed Brown   IS          iscol_local;
3015c5e4d11fSDmitry Karpeev   PetscBool   isstride;
3016c5e4d11fSDmitry Karpeev   PetscMPIInt lisstride = 0, gisstride;
30173782ecc7SHong Zhang 
30183782ecc7SHong Zhang   PetscFunctionBegin;
30193782ecc7SHong Zhang   /* check if we are grabbing all columns*/
30209566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
30213782ecc7SHong Zhang 
3022c5e4d11fSDmitry Karpeev   if (isstride) {
3023c5e4d11fSDmitry Karpeev     PetscInt start, len, mstart, mlen;
30249566063dSJacob Faibussowitsch     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
30259566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &len));
30269566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3027c5e4d11fSDmitry Karpeev     if (mstart == start && mlen - mstart == len) lisstride = 1;
3028c5e4d11fSDmitry Karpeev   }
30293782ecc7SHong Zhang 
30301c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3031c5e4d11fSDmitry Karpeev   if (gisstride) {
3032c5e4d11fSDmitry Karpeev     PetscInt N;
30339566063dSJacob Faibussowitsch     PetscCall(MatGetSize(mat, NULL, &N));
30349566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
30359566063dSJacob Faibussowitsch     PetscCall(ISSetIdentity(iscol_local));
30369566063dSJacob Faibussowitsch     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3037c5e4d11fSDmitry Karpeev   } else {
3038c5bfad50SMark F. Adams     PetscInt cbs;
30399566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(iscol, &cbs));
30409566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
30419566063dSJacob Faibussowitsch     PetscCall(ISSetBlockSize(iscol_local, cbs));
3042b79d0421SJed Brown   }
30433782ecc7SHong Zhang 
30443782ecc7SHong Zhang   *isseq = iscol_local;
30453782ecc7SHong Zhang   PetscFunctionReturn(0);
3046c5e4d11fSDmitry Karpeev }
30478d2139bdSHong Zhang 
3048ddfdf956SHong Zhang /*
30499c988bcaSHong Zhang  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
30509c988bcaSHong Zhang  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3051ddfdf956SHong Zhang 
3052ddfdf956SHong Zhang  Input Parameters:
3053ddfdf956SHong Zhang    mat - matrix
30549c988bcaSHong Zhang    isrow - parallel row index set; its local indices are a subset of local columns of mat,
30559c988bcaSHong Zhang            i.e., mat->rstart <= isrow[i] < mat->rend
3056ddfdf956SHong Zhang    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3057ddfdf956SHong Zhang            i.e., mat->cstart <= iscol[i] < mat->cend
3058ddfdf956SHong Zhang  Output Parameter:
30599c988bcaSHong Zhang    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
30609c988bcaSHong Zhang    iscol_o - sequential column index set for retrieving mat->B
30619c988bcaSHong Zhang    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3062ddfdf956SHong Zhang  */
30639371c9d4SSatish Balay PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) {
3064040216a4SHong Zhang   Vec             x, cmap;
3065040216a4SHong Zhang   const PetscInt *is_idx;
3066040216a4SHong Zhang   PetscScalar    *xarray, *cmaparray;
30679c988bcaSHong Zhang   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3068040216a4SHong Zhang   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3069040216a4SHong Zhang   Mat             B    = a->B;
3070040216a4SHong Zhang   Vec             lvec = a->lvec, lcmap;
3071a31a438cSHong Zhang   PetscInt        i, cstart, cend, Bn = B->cmap->N;
30728b3fa1f7SHong Zhang   MPI_Comm        comm;
30733a8d973cSHong Zhang   VecScatter      Mvctx = a->Mvctx;
30743782ecc7SHong Zhang 
30753782ecc7SHong Zhang   PetscFunctionBegin;
30769566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
30779566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &ncols));
30788b3fa1f7SHong Zhang 
3079ddfdf956SHong Zhang   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
30809566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(mat, &x, NULL));
30819566063dSJacob Faibussowitsch   PetscCall(VecSet(x, -1.0));
30829566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(x, &cmap));
30839566063dSJacob Faibussowitsch   PetscCall(VecSet(cmap, -1.0));
30840a351717SHong Zhang 
30859c988bcaSHong Zhang   /* Get start indices */
30869566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3087ddfdf956SHong Zhang   isstart -= ncols;
30889566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3089040216a4SHong Zhang 
30909566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol, &is_idx));
30919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &xarray));
30929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(cmap, &cmaparray));
30939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ncols, &idx));
3094ddfdf956SHong Zhang   for (i = 0; i < ncols; i++) {
30958b3fa1f7SHong Zhang     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3096ddfdf956SHong Zhang     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
30979c988bcaSHong Zhang     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
30988b3fa1f7SHong Zhang   }
30999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &xarray));
31009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(cmap, &cmaparray));
31019566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &is_idx));
31028b3fa1f7SHong Zhang 
31039c988bcaSHong Zhang   /* Get iscol_d */
31049566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
31059566063dSJacob Faibussowitsch   PetscCall(ISGetBlockSize(iscol, &i));
31069566063dSJacob Faibussowitsch   PetscCall(ISSetBlockSize(*iscol_d, i));
3107feb78a15SHong Zhang 
31089c988bcaSHong Zhang   /* Get isrow_d */
31099566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &m));
3110feb78a15SHong Zhang   rstart = mat->rmap->rstart;
31119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &idx));
31129566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &is_idx));
31139c988bcaSHong Zhang   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
31149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &is_idx));
3115feb78a15SHong Zhang 
31169566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
31179566063dSJacob Faibussowitsch   PetscCall(ISGetBlockSize(isrow, &i));
31189566063dSJacob Faibussowitsch   PetscCall(ISSetBlockSize(*isrow_d, i));
3119feb78a15SHong Zhang 
31209c988bcaSHong Zhang   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
31219566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
31229566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3123ddfdf956SHong Zhang 
31249566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(lvec, &lcmap));
312507250d77SHong Zhang 
31269566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
31279566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
312864efcef9SHong Zhang 
31299c988bcaSHong Zhang   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3130ddfdf956SHong Zhang   /* off-process column indices */
31319c988bcaSHong Zhang   count = 0;
31329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bn, &idx));
31339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bn, &cmap1));
3134feb78a15SHong Zhang 
31359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lvec, &xarray));
31369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lcmap, &cmaparray));
31378b3fa1f7SHong Zhang   for (i = 0; i < Bn; i++) {
3138f73421bfSHong Zhang     if (PetscRealPart(xarray[i]) > -1.0) {
31399c988bcaSHong Zhang       idx[count]   = i;                                     /* local column index in off-diagonal part B */
31401c645242SHong Zhang       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
31411c645242SHong Zhang       count++;
31428b3fa1f7SHong Zhang     }
31438b3fa1f7SHong Zhang   }
31449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lvec, &xarray));
31459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lcmap, &cmaparray));
314607250d77SHong Zhang 
31479566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3148b6d9b4e0SHong Zhang   /* cannot ensure iscol_o has same blocksize as iscol! */
3149b6d9b4e0SHong Zhang 
31509566063dSJacob Faibussowitsch   PetscCall(PetscFree(idx));
31519c988bcaSHong Zhang   *garray = cmap1;
31529c988bcaSHong Zhang 
31539566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&x));
31549566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&cmap));
31559566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lcmap));
3156040216a4SHong Zhang   PetscFunctionReturn(0);
3157040216a4SHong Zhang }
3158040216a4SHong Zhang 
3159b20e2604SHong Zhang /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
31609371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) {
3161b20e2604SHong Zhang   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
31621fd43edeSHong Zhang   Mat         M = NULL;
31633b00a383SHong Zhang   MPI_Comm    comm;
3164b20e2604SHong Zhang   IS          iscol_d, isrow_d, iscol_o;
31653b00a383SHong Zhang   Mat         Asub = NULL, Bsub = NULL;
3166b20e2604SHong Zhang   PetscInt    n;
31673b00a383SHong Zhang 
31683b00a383SHong Zhang   PetscFunctionBegin;
31699566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
31703b00a383SHong Zhang 
31713b00a383SHong Zhang   if (call == MAT_REUSE_MATRIX) {
3172b20e2604SHong Zhang     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
31739566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
317428b400f6SJacob Faibussowitsch     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
31753b00a383SHong Zhang 
31769566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
317728b400f6SJacob Faibussowitsch     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
31783b00a383SHong Zhang 
31799566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
318028b400f6SJacob Faibussowitsch     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
31813b00a383SHong Zhang 
3182b20e2604SHong Zhang     /* Update diagonal and off-diagonal portions of submat */
3183b20e2604SHong Zhang     asub = (Mat_MPIAIJ *)(*submat)->data;
31849566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
31859566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_o, &n));
318648a46eb9SPierre Jolivet     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
31879566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
31889566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
31893b00a383SHong Zhang 
31903b00a383SHong Zhang   } else { /* call == MAT_INITIAL_MATRIX) */
31919c988bcaSHong Zhang     const PetscInt *garray;
3192b20e2604SHong Zhang     PetscInt        BsubN;
31933b00a383SHong Zhang 
3194b20e2604SHong Zhang     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
31959566063dSJacob Faibussowitsch     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
31963b00a383SHong Zhang 
3197b20e2604SHong Zhang     /* Create local submatrices Asub and Bsub */
31989566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
31999566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
32003b00a383SHong Zhang 
32019c988bcaSHong Zhang     /* Create submatrix M */
32029566063dSJacob Faibussowitsch     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
32033b00a383SHong Zhang 
3204b20e2604SHong Zhang     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3205b20e2604SHong Zhang     asub = (Mat_MPIAIJ *)M->data;
32067cfce09cSHong Zhang 
32079566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3208b20e2604SHong Zhang     n = asub->B->cmap->N;
3209b20e2604SHong Zhang     if (BsubN > n) {
3210c4762a1bSJed Brown       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
32117cfce09cSHong Zhang       const PetscInt *idx;
32129c988bcaSHong Zhang       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
32139566063dSJacob Faibussowitsch       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
32147cfce09cSHong Zhang 
32159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &idx_new));
32167cfce09cSHong Zhang       j = 0;
32179566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(iscol_o, &idx));
3218b20e2604SHong Zhang       for (i = 0; i < n; i++) {
32197cfce09cSHong Zhang         if (j >= BsubN) break;
32209c988bcaSHong Zhang         while (subgarray[i] > garray[j]) j++;
32217cfce09cSHong Zhang 
32229c988bcaSHong Zhang         if (subgarray[i] == garray[j]) {
32237cfce09cSHong Zhang           idx_new[i] = idx[j++];
322498921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
32257cfce09cSHong Zhang       }
32269566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(iscol_o, &idx));
32277cfce09cSHong Zhang 
32289566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iscol_o));
32299566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
32307cfce09cSHong Zhang 
3231b20e2604SHong Zhang     } else if (BsubN < n) {
323298921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3233b20e2604SHong Zhang     }
32347cfce09cSHong Zhang 
32359566063dSJacob Faibussowitsch     PetscCall(PetscFree(garray));
3236b20e2604SHong Zhang     *submat = M;
32373b00a383SHong Zhang 
3238e489de8fSHong Zhang     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
32399566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
32409566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrow_d));
32413b00a383SHong Zhang 
32429566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
32439566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_d));
32443b00a383SHong Zhang 
32459566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
32469566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_o));
32473b00a383SHong Zhang   }
32483b00a383SHong Zhang   PetscFunctionReturn(0);
32493b00a383SHong Zhang }
32503b00a383SHong Zhang 
32519371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
32521358a193SHong Zhang   IS        iscol_local = NULL, isrow_d;
32533782ecc7SHong Zhang   PetscInt  csize;
325418e627e3SHong Zhang   PetscInt  n, i, j, start, end;
32554a3daf6eSHong Zhang   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
32563782ecc7SHong Zhang   MPI_Comm  comm;
32573782ecc7SHong Zhang 
32583782ecc7SHong Zhang   PetscFunctionBegin;
3259bcae8d28SHong Zhang   /* If isrow has same processor distribution as mat,
3260a31a438cSHong Zhang      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
32618f69fa7bSHong Zhang   if (call == MAT_REUSE_MATRIX) {
32629566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3263d5761cdaSHong Zhang     if (isrow_d) {
3264d5761cdaSHong Zhang       sameRowDist  = PETSC_TRUE;
3265d5761cdaSHong Zhang       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3266d5761cdaSHong Zhang     } else {
32679566063dSJacob Faibussowitsch       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3268d5761cdaSHong Zhang       if (iscol_local) {
3269d5761cdaSHong Zhang         sameRowDist  = PETSC_TRUE;
3270d5761cdaSHong Zhang         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3271d5761cdaSHong Zhang       }
3272d5761cdaSHong Zhang     }
32738f69fa7bSHong Zhang   } else {
3274e489de8fSHong Zhang     /* Check if isrow has same processor distribution as mat */
327518e627e3SHong Zhang     sameDist[0] = PETSC_FALSE;
32769566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(isrow, &n));
32773782ecc7SHong Zhang     if (!n) {
327818e627e3SHong Zhang       sameDist[0] = PETSC_TRUE;
32793782ecc7SHong Zhang     } else {
32809566063dSJacob Faibussowitsch       PetscCall(ISGetMinMax(isrow, &i, &j));
32819566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3282ad540459SPierre Jolivet       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
32838f69fa7bSHong Zhang     }
32843782ecc7SHong Zhang 
3285e489de8fSHong Zhang     /* Check if iscol has same processor distribution as mat */
328618e627e3SHong Zhang     sameDist[1] = PETSC_FALSE;
32879566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &n));
328818e627e3SHong Zhang     if (!n) {
328918e627e3SHong Zhang       sameDist[1] = PETSC_TRUE;
329018e627e3SHong Zhang     } else {
32919566063dSJacob Faibussowitsch       PetscCall(ISGetMinMax(iscol, &i, &j));
32929566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
329318e627e3SHong Zhang       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
329418e627e3SHong Zhang     }
329518e627e3SHong Zhang 
32969566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
32971c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
329818e627e3SHong Zhang     sameRowDist = tsameDist[0];
329918e627e3SHong Zhang   }
330018e627e3SHong Zhang 
330118e627e3SHong Zhang   if (sameRowDist) {
3302b20e2604SHong Zhang     if (tsameDist[1]) { /* sameRowDist & sameColDist */
33033b00a383SHong Zhang       /* isrow and iscol have same processor distribution as mat */
33049566063dSJacob Faibussowitsch       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
33051358a193SHong Zhang       PetscFunctionReturn(0);
3306b20e2604SHong Zhang     } else { /* sameRowDist */
33073b00a383SHong Zhang       /* isrow has same processor distribution as mat */
33081358a193SHong Zhang       if (call == MAT_INITIAL_MATRIX) {
33091358a193SHong Zhang         PetscBool sorted;
33109566063dSJacob Faibussowitsch         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
33119566063dSJacob Faibussowitsch         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
33129566063dSJacob Faibussowitsch         PetscCall(ISGetSize(iscol, &i));
331308401ef6SPierre Jolivet         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
33141358a193SHong Zhang 
33159566063dSJacob Faibussowitsch         PetscCall(ISSorted(iscol_local, &sorted));
33161358a193SHong Zhang         if (sorted) {
33171358a193SHong Zhang           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
33189566063dSJacob Faibussowitsch           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
33193782ecc7SHong Zhang           PetscFunctionReturn(0);
33203782ecc7SHong Zhang         }
33211358a193SHong Zhang       } else { /* call == MAT_REUSE_MATRIX */
332248c0d076SHong Zhang         IS iscol_sub;
33239566063dSJacob Faibussowitsch         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
332448c0d076SHong Zhang         if (iscol_sub) {
33259566063dSJacob Faibussowitsch           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
332648c0d076SHong Zhang           PetscFunctionReturn(0);
332748c0d076SHong Zhang         }
33281358a193SHong Zhang       }
33291358a193SHong Zhang     }
33301358a193SHong Zhang   }
33313782ecc7SHong Zhang 
3332bcae8d28SHong Zhang   /* General case: iscol -> iscol_local which has global size of iscol */
33333782ecc7SHong Zhang   if (call == MAT_REUSE_MATRIX) {
33349566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
333528b400f6SJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
33363782ecc7SHong Zhang   } else {
333748a46eb9SPierre Jolivet     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
33381358a193SHong Zhang   }
33393782ecc7SHong Zhang 
33409566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
33419566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
33428f69fa7bSHong Zhang 
3343b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
33449566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
33459566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
3346b79d0421SJed Brown   }
33474aa3045dSJed Brown   PetscFunctionReturn(0);
33484aa3045dSJed Brown }
33494aa3045dSJed Brown 
3350feb78a15SHong Zhang /*@C
335111a5261eSBarry Smith      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3352feb78a15SHong Zhang          and "off-diagonal" part of the matrix in CSR format.
3353feb78a15SHong Zhang 
3354d083f849SBarry Smith    Collective
3355feb78a15SHong Zhang 
3356feb78a15SHong Zhang    Input Parameters:
3357feb78a15SHong Zhang +  comm - MPI communicator
3358feb78a15SHong Zhang .  A - "diagonal" portion of matrix
3359b20e2604SHong Zhang .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3360feb78a15SHong Zhang -  garray - global index of B columns
3361feb78a15SHong Zhang 
3362feb78a15SHong Zhang    Output Parameter:
3363d5761cdaSHong Zhang .   mat - the matrix, with input A as its local diagonal matrix
3364feb78a15SHong Zhang    Level: advanced
3365feb78a15SHong Zhang 
3366feb78a15SHong Zhang    Notes:
336711a5261eSBarry Smith    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
336811a5261eSBarry Smith 
3369d5761cdaSHong Zhang    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3370feb78a15SHong Zhang 
337111a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3372feb78a15SHong Zhang @*/
33739371c9d4SSatish Balay PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) {
3374feb78a15SHong Zhang   Mat_MPIAIJ        *maij;
3375e489de8fSHong Zhang   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3376a5348796SHong Zhang   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3377ce496241SStefano Zampini   const PetscScalar *oa;
3378e489de8fSHong Zhang   Mat                Bnew;
3379feb78a15SHong Zhang   PetscInt           m, n, N;
33804ab4d6f4SRichard Tran Mills   MatType            mpi_mat_type;
3381feb78a15SHong Zhang 
3382feb78a15SHong Zhang   PetscFunctionBegin;
33839566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
33849566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
338508401ef6SPierre Jolivet   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
338608401ef6SPierre Jolivet   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3387b6d9b4e0SHong Zhang   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
338808401ef6SPierre Jolivet   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3389feb78a15SHong Zhang 
3390e489de8fSHong Zhang   /* Get global columns of mat */
33911c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3392feb78a15SHong Zhang 
33939566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
33944ab4d6f4SRichard Tran Mills   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
33954ab4d6f4SRichard Tran Mills   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
33964ab4d6f4SRichard Tran Mills   PetscCall(MatSetType(*mat, mpi_mat_type));
33974ab4d6f4SRichard Tran Mills 
33989566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3399feb78a15SHong Zhang   maij = (Mat_MPIAIJ *)(*mat)->data;
3400feb78a15SHong Zhang 
3401feb78a15SHong Zhang   (*mat)->preallocated = PETSC_TRUE;
3402feb78a15SHong Zhang 
34039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->rmap));
34049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3405feb78a15SHong Zhang 
3406e489de8fSHong Zhang   /* Set A as diagonal portion of *mat */
3407feb78a15SHong Zhang   maij->A = A;
3408feb78a15SHong Zhang 
3409a5348796SHong Zhang   nz = oi[m];
3410a5348796SHong Zhang   for (i = 0; i < nz; i++) {
3411a5348796SHong Zhang     col   = oj[i];
3412a5348796SHong Zhang     oj[i] = garray[col];
3413feb78a15SHong Zhang   }
3414feb78a15SHong Zhang 
3415e489de8fSHong Zhang   /* Set Bnew as off-diagonal portion of *mat */
34169566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
34179566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
34189566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3419e489de8fSHong Zhang   bnew        = (Mat_SeqAIJ *)Bnew->data;
3420e489de8fSHong Zhang   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3421e489de8fSHong Zhang   maij->B     = Bnew;
3422d5761cdaSHong Zhang 
342308401ef6SPierre Jolivet   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3424d5761cdaSHong Zhang 
3425e489de8fSHong Zhang   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3426d5761cdaSHong Zhang   b->free_a       = PETSC_FALSE;
3427d5761cdaSHong Zhang   b->free_ij      = PETSC_FALSE;
34289566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
3429d5761cdaSHong Zhang 
3430e489de8fSHong Zhang   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3431e489de8fSHong Zhang   bnew->free_a       = PETSC_TRUE;
3432e489de8fSHong Zhang   bnew->free_ij      = PETSC_TRUE;
3433feb78a15SHong Zhang 
3434a5348796SHong Zhang   /* condense columns of maij->B */
34359566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
34369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
34379566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
34389566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
34399566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3440feb78a15SHong Zhang   PetscFunctionReturn(0);
3441feb78a15SHong Zhang }
3442feb78a15SHong Zhang 
3443ef514586SHong Zhang extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
34444aa3045dSJed Brown 
34459371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) {
344698b658c4SHong Zhang   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
344785f27616SHong Zhang   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
344898b658c4SHong Zhang   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
34491fd43edeSHong Zhang   Mat             M, Msub, B = a->B;
345098b658c4SHong Zhang   MatScalar      *aa;
345100e6dbe6SBarry Smith   Mat_SeqAIJ     *aij;
3452a31a438cSHong Zhang   PetscInt       *garray = a->garray, *colsub, Ncols;
345398b658c4SHong Zhang   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
345498b658c4SHong Zhang   IS              iscol_sub, iscmap;
345598b658c4SHong Zhang   const PetscInt *is_idx, *cmap;
345618e627e3SHong Zhang   PetscBool       allcolumns = PETSC_FALSE;
3457a31a438cSHong Zhang   MPI_Comm        comm;
34587e2c5f70SBarry Smith 
3459a0ff6018SBarry Smith   PetscFunctionBegin;
34609566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3461d5761cdaSHong Zhang   if (call == MAT_REUSE_MATRIX) {
34629566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
346328b400f6SJacob Faibussowitsch     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
34649566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_sub, &count));
3465d5761cdaSHong Zhang 
34669566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
346728b400f6SJacob Faibussowitsch     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3468d5761cdaSHong Zhang 
34699566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
347028b400f6SJacob Faibussowitsch     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3471d5761cdaSHong Zhang 
34729566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3473d5761cdaSHong Zhang 
3474d5761cdaSHong Zhang   } else { /* call == MAT_INITIAL_MATRIX) */
34753b00a383SHong Zhang     PetscBool flg;
34763b00a383SHong Zhang 
34779566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &n));
34789566063dSJacob Faibussowitsch     PetscCall(ISGetSize(iscol, &Ncols));
3479bcae8d28SHong Zhang 
34803b00a383SHong Zhang     /* (1) iscol -> nonscalable iscol_local */
3481366a327dSHong Zhang     /* Check for special case: each processor gets entire matrix columns */
34829566063dSJacob Faibussowitsch     PetscCall(ISIdentity(iscol_local, &flg));
3483366a327dSHong Zhang     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
34841c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3485366a327dSHong Zhang     if (allcolumns) {
3486366a327dSHong Zhang       iscol_sub = iscol_local;
34879566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)iscol_local));
34889566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3489366a327dSHong Zhang 
34903b00a383SHong Zhang     } else {
34911358a193SHong Zhang       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3492244c7f15SHong Zhang       PetscInt *idx, *cmap1, k;
34939566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(Ncols, &idx));
34949566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(Ncols, &cmap1));
34959566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(iscol_local, &is_idx));
34968d2139bdSHong Zhang       count = 0;
3497a31a438cSHong Zhang       k     = 0;
3498a31a438cSHong Zhang       for (i = 0; i < Ncols; i++) {
3499a31a438cSHong Zhang         j = is_idx[i];
3500a31a438cSHong Zhang         if (j >= cstart && j < cend) {
3501a31a438cSHong Zhang           /* diagonal part of mat */
35028d2139bdSHong Zhang           idx[count]     = j;
3503366a327dSHong Zhang           cmap1[count++] = i; /* column index in submat */
35044a3daf6eSHong Zhang         } else if (Bn) {
3505a31a438cSHong Zhang           /* off-diagonal part of mat */
3506a31a438cSHong Zhang           if (j == garray[k]) {
35078d2139bdSHong Zhang             idx[count]     = j;
3508a31a438cSHong Zhang             cmap1[count++] = i; /* column index in submat */
3509a31a438cSHong Zhang           } else if (j > garray[k]) {
3510a31a438cSHong Zhang             while (j > garray[k] && k < Bn - 1) k++;
3511a31a438cSHong Zhang             if (j == garray[k]) {
3512a31a438cSHong Zhang               idx[count]     = j;
3513a31a438cSHong Zhang               cmap1[count++] = i; /* column index in submat */
35148d2139bdSHong Zhang             }
35158d2139bdSHong Zhang           }
35168d2139bdSHong Zhang         }
35178d2139bdSHong Zhang       }
35189566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
35198d2139bdSHong Zhang 
35209566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
35219566063dSJacob Faibussowitsch       PetscCall(ISGetBlockSize(iscol, &cbs));
35229566063dSJacob Faibussowitsch       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3523b6d9b4e0SHong Zhang 
35249566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3525a31a438cSHong Zhang     }
35268b3fa1f7SHong Zhang 
35273b00a383SHong Zhang     /* (3) Create sequential Msub */
35289566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3529d5761cdaSHong Zhang   }
35308d2139bdSHong Zhang 
35319566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_sub, &count));
353298b658c4SHong Zhang   aij = (Mat_SeqAIJ *)(Msub)->data;
353398b658c4SHong Zhang   ii  = aij->i;
35349566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscmap, &cmap));
3535a0ff6018SBarry Smith 
3536a0ff6018SBarry Smith   /*
3537a0ff6018SBarry Smith       m - number of local rows
3538a31a438cSHong Zhang       Ncols - number of columns (same on all processors)
3539a0ff6018SBarry Smith       rstart - first row in new global matrix generated
3540a0ff6018SBarry Smith   */
35419566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Msub, &m, NULL));
354298b658c4SHong Zhang 
35433b00a383SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
35443b00a383SHong Zhang     /* (4) Create parallel newmat */
354598b658c4SHong Zhang     PetscMPIInt rank, size;
3546bcae8d28SHong Zhang     PetscInt    csize;
354798b658c4SHong Zhang 
35489566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
35499566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
355000e6dbe6SBarry Smith 
3551a0ff6018SBarry Smith     /*
355200e6dbe6SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
355300e6dbe6SBarry Smith         portions of the matrix in order to do correct preallocation
3554a0ff6018SBarry Smith     */
355500e6dbe6SBarry Smith 
355600e6dbe6SBarry Smith     /* first get start and end of "diagonal" columns */
35579566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &csize));
35586a6a5d1dSBarry Smith     if (csize == PETSC_DECIDE) {
35599566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow, &mglobal));
3560a31a438cSHong Zhang       if (mglobal == Ncols) { /* square matrix */
3561e2c4fddaSBarry Smith         nlocal = m;
35626a6a5d1dSBarry Smith       } else {
3563a31a438cSHong Zhang         nlocal = Ncols / size + ((Ncols % size) > rank);
3564ab50ec6bSBarry Smith       }
3565ab50ec6bSBarry Smith     } else {
35666a6a5d1dSBarry Smith       nlocal = csize;
35676a6a5d1dSBarry Smith     }
35689566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
356900e6dbe6SBarry Smith     rstart = rend - nlocal;
3570aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
357100e6dbe6SBarry Smith 
357200e6dbe6SBarry Smith     /* next, compute all the lengths */
357398b658c4SHong Zhang     jj = aij->j;
35749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
357500e6dbe6SBarry Smith     olens = dlens + m;
357600e6dbe6SBarry Smith     for (i = 0; i < m; i++) {
357700e6dbe6SBarry Smith       jend = ii[i + 1] - ii[i];
357800e6dbe6SBarry Smith       olen = 0;
357900e6dbe6SBarry Smith       dlen = 0;
358000e6dbe6SBarry Smith       for (j = 0; j < jend; j++) {
358115b2185cSHong Zhang         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
358200e6dbe6SBarry Smith         else dlen++;
358300e6dbe6SBarry Smith         jj++;
358400e6dbe6SBarry Smith       }
358500e6dbe6SBarry Smith       olens[i] = olen;
358600e6dbe6SBarry Smith       dlens[i] = dlen;
358700e6dbe6SBarry Smith     }
3588b6d9b4e0SHong Zhang 
35899566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(isrow, &bs));
35909566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(iscol, &cbs));
359198b658c4SHong Zhang 
35929566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &M));
35939566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
35949566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(M, bs, cbs));
35959566063dSJacob Faibussowitsch     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
35969566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
35979566063dSJacob Faibussowitsch     PetscCall(PetscFree(dlens));
3598d5761cdaSHong Zhang 
3599d5761cdaSHong Zhang   } else { /* call == MAT_REUSE_MATRIX */
3600a0ff6018SBarry Smith     M = *newmat;
36019566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M, &i, NULL));
360208401ef6SPierre Jolivet     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
36039566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
3604c48de900SBarry Smith     /*
3605c48de900SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3606c48de900SBarry Smith        rather than the slower MatSetValues().
3607c48de900SBarry Smith     */
3608c48de900SBarry Smith     M->was_assembled = PETSC_TRUE;
3609c48de900SBarry Smith     M->assembled     = PETSC_FALSE;
3610a0ff6018SBarry Smith   }
3611548ecf4dSHong Zhang 
36123b00a383SHong Zhang   /* (5) Set values of Msub to *newmat */
36139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(count, &colsub));
36149566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
361598b658c4SHong Zhang 
361698b658c4SHong Zhang   jj = aij->j;
36179566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3618a0ff6018SBarry Smith   for (i = 0; i < m; i++) {
3619a0ff6018SBarry Smith     row = rstart + i;
362000e6dbe6SBarry Smith     nz  = ii[i + 1] - ii[i];
362115b2185cSHong Zhang     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
36229566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
36239371c9d4SSatish Balay     jj += nz;
36249371c9d4SSatish Balay     aa += nz;
3625a0ff6018SBarry Smith   }
36269566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
36279566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscmap, &cmap));
3628a0ff6018SBarry Smith 
36299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
36309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3631fee21e36SBarry Smith 
36329566063dSJacob Faibussowitsch   PetscCall(PetscFree(colsub));
363398b658c4SHong Zhang 
363498b658c4SHong Zhang   /* save Msub, iscol_sub and iscmap used in processor for next request */
3635fee21e36SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
36363b00a383SHong Zhang     *newmat = M;
36379566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
36389566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Msub));
363998b658c4SHong Zhang 
36409566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
36419566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_sub));
364298b658c4SHong Zhang 
36439566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
36449566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscmap));
3645bcae8d28SHong Zhang 
3646bcae8d28SHong Zhang     if (iscol_local) {
36479566063dSJacob Faibussowitsch       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
36489566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iscol_local));
3649bcae8d28SHong Zhang     }
365098b658c4SHong Zhang   }
3651a0ff6018SBarry Smith   PetscFunctionReturn(0);
3652a0ff6018SBarry Smith }
3653273d9f13SBarry Smith 
3654df40acb1SHong Zhang /*
3655df40acb1SHong Zhang     Not great since it makes two copies of the submatrix, first an SeqAIJ
3656df40acb1SHong Zhang   in local and then by concatenating the local matrices the end result.
3657df40acb1SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3658df40acb1SHong Zhang 
365911a5261eSBarry Smith   This requires a sequential iscol with all indices.
3660df40acb1SHong Zhang */
36619371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
3662df40acb1SHong Zhang   PetscMPIInt rank, size;
3663df40acb1SHong Zhang   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3664df40acb1SHong Zhang   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3665df40acb1SHong Zhang   Mat         M, Mreuse;
366698b658c4SHong Zhang   MatScalar  *aa, *vwork;
3667df40acb1SHong Zhang   MPI_Comm    comm;
3668df40acb1SHong Zhang   Mat_SeqAIJ *aij;
36690b27a90eSHong Zhang   PetscBool   colflag, allcolumns = PETSC_FALSE;
3670df40acb1SHong Zhang 
3671df40acb1SHong Zhang   PetscFunctionBegin;
36729566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
36739566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
36749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3675df40acb1SHong Zhang 
36760b27a90eSHong Zhang   /* Check for special case: each processor gets entire matrix columns */
36779566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &colflag));
36789566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &n));
36790b27a90eSHong Zhang   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
36801c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
36810b27a90eSHong Zhang 
3682df40acb1SHong Zhang   if (call == MAT_REUSE_MATRIX) {
36839566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
368428b400f6SJacob Faibussowitsch     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
36859566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3686df40acb1SHong Zhang   } else {
36879566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3688df40acb1SHong Zhang   }
3689df40acb1SHong Zhang 
3690df40acb1SHong Zhang   /*
3691df40acb1SHong Zhang       m - number of local rows
3692df40acb1SHong Zhang       n - number of columns (same on all processors)
3693df40acb1SHong Zhang       rstart - first row in new global matrix generated
3694df40acb1SHong Zhang   */
36959566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Mreuse, &m, &n));
36969566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3697df40acb1SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
3698df40acb1SHong Zhang     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3699df40acb1SHong Zhang     ii  = aij->i;
3700df40acb1SHong Zhang     jj  = aij->j;
3701df40acb1SHong Zhang 
3702df40acb1SHong Zhang     /*
3703df40acb1SHong Zhang         Determine the number of non-zeros in the diagonal and off-diagonal
3704df40acb1SHong Zhang         portions of the matrix in order to do correct preallocation
3705df40acb1SHong Zhang     */
3706df40acb1SHong Zhang 
3707df40acb1SHong Zhang     /* first get start and end of "diagonal" columns */
3708df40acb1SHong Zhang     if (csize == PETSC_DECIDE) {
37099566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow, &mglobal));
3710df40acb1SHong Zhang       if (mglobal == n) { /* square matrix */
3711df40acb1SHong Zhang         nlocal = m;
3712df40acb1SHong Zhang       } else {
3713df40acb1SHong Zhang         nlocal = n / size + ((n % size) > rank);
3714df40acb1SHong Zhang       }
3715df40acb1SHong Zhang     } else {
3716df40acb1SHong Zhang       nlocal = csize;
3717df40acb1SHong Zhang     }
37189566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3719df40acb1SHong Zhang     rstart = rend - nlocal;
3720aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3721df40acb1SHong Zhang 
3722df40acb1SHong Zhang     /* next, compute all the lengths */
37239566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3724df40acb1SHong Zhang     olens = dlens + m;
3725df40acb1SHong Zhang     for (i = 0; i < m; i++) {
3726df40acb1SHong Zhang       jend = ii[i + 1] - ii[i];
3727df40acb1SHong Zhang       olen = 0;
3728df40acb1SHong Zhang       dlen = 0;
3729df40acb1SHong Zhang       for (j = 0; j < jend; j++) {
3730df40acb1SHong Zhang         if (*jj < rstart || *jj >= rend) olen++;
3731df40acb1SHong Zhang         else dlen++;
3732df40acb1SHong Zhang         jj++;
3733df40acb1SHong Zhang       }
3734df40acb1SHong Zhang       olens[i] = olen;
3735df40acb1SHong Zhang       dlens[i] = dlen;
3736df40acb1SHong Zhang     }
37379566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &M));
37389566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
37399566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(M, bs, cbs));
37409566063dSJacob Faibussowitsch     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
37419566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
37429566063dSJacob Faibussowitsch     PetscCall(PetscFree(dlens));
3743df40acb1SHong Zhang   } else {
3744df40acb1SHong Zhang     PetscInt ml, nl;
3745df40acb1SHong Zhang 
3746df40acb1SHong Zhang     M = *newmat;
37479566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M, &ml, &nl));
374808401ef6SPierre Jolivet     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
37499566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
3750df40acb1SHong Zhang     /*
3751df40acb1SHong Zhang          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3752df40acb1SHong Zhang        rather than the slower MatSetValues().
3753df40acb1SHong Zhang     */
3754df40acb1SHong Zhang     M->was_assembled = PETSC_TRUE;
3755df40acb1SHong Zhang     M->assembled     = PETSC_FALSE;
3756df40acb1SHong Zhang   }
37579566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3758df40acb1SHong Zhang   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3759df40acb1SHong Zhang   ii  = aij->i;
3760df40acb1SHong Zhang   jj  = aij->j;
37612e5835c6SStefano Zampini 
37622e5835c6SStefano Zampini   /* trigger copy to CPU if needed */
37639566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3764df40acb1SHong Zhang   for (i = 0; i < m; i++) {
3765df40acb1SHong Zhang     row   = rstart + i;
3766df40acb1SHong Zhang     nz    = ii[i + 1] - ii[i];
37679371c9d4SSatish Balay     cwork = jj;
37689371c9d4SSatish Balay     jj += nz;
37699371c9d4SSatish Balay     vwork = aa;
37709371c9d4SSatish Balay     aa += nz;
37719566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3772df40acb1SHong Zhang   }
37739566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3774df40acb1SHong Zhang 
37759566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
37769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3777df40acb1SHong Zhang   *newmat = M;
3778df40acb1SHong Zhang 
3779df40acb1SHong Zhang   /* save submatrix used in processor for next request */
3780df40acb1SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
37819566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
37829566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Mreuse));
3783df40acb1SHong Zhang   }
3784df40acb1SHong Zhang   PetscFunctionReturn(0);
3785df40acb1SHong Zhang }
3786df40acb1SHong Zhang 
37879371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
37886a3d2595SBarry Smith   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3789899cda47SBarry Smith   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3790ccd8e176SBarry Smith   const PetscInt *JJ;
3791eeb24464SBarry Smith   PetscBool       nooffprocentries;
37926a3d2595SBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3793ccd8e176SBarry Smith 
3794ccd8e176SBarry Smith   PetscFunctionBegin;
3795aed4548fSBarry Smith   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3796899cda47SBarry Smith 
37979566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
37989566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
3799d0f46423SBarry Smith   m      = B->rmap->n;
3800d0f46423SBarry Smith   cstart = B->cmap->rstart;
3801d0f46423SBarry Smith   cend   = B->cmap->rend;
3802d0f46423SBarry Smith   rstart = B->rmap->rstart;
3803899cda47SBarry Smith 
38049566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3805ccd8e176SBarry Smith 
380676bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
38078f8f2f0dSBarry Smith     for (i = 0; i < m; i++) {
3808ecc77c7aSBarry Smith       nnz = Ii[i + 1] - Ii[i];
3809ecc77c7aSBarry Smith       JJ  = J + Ii[i];
381008401ef6SPierre Jolivet       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
381108401ef6SPierre Jolivet       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
381208401ef6SPierre Jolivet       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3813ecc77c7aSBarry Smith     }
381476bd3646SJed Brown   }
3815ecc77c7aSBarry Smith 
38168f8f2f0dSBarry Smith   for (i = 0; i < m; i++) {
3817b7940d39SSatish Balay     nnz     = Ii[i + 1] - Ii[i];
3818b7940d39SSatish Balay     JJ      = J + Ii[i];
3819ccd8e176SBarry Smith     nnz_max = PetscMax(nnz_max, nnz);
3820ccd8e176SBarry Smith     d       = 0;
38210daa03b5SJed Brown     for (j = 0; j < nnz; j++) {
38220daa03b5SJed Brown       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3823ccd8e176SBarry Smith     }
3824ccd8e176SBarry Smith     d_nnz[i] = d;
3825ccd8e176SBarry Smith     o_nnz[i] = nnz - d;
3826ccd8e176SBarry Smith   }
38279566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
38289566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
3829ccd8e176SBarry Smith 
38308f8f2f0dSBarry Smith   for (i = 0; i < m; i++) {
3831ccd8e176SBarry Smith     ii = i + rstart;
38329566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3833ccd8e176SBarry Smith   }
3834eeb24464SBarry Smith   nooffprocentries    = B->nooffprocentries;
3835eeb24464SBarry Smith   B->nooffprocentries = PETSC_TRUE;
38369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
38379566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3838eeb24464SBarry Smith   B->nooffprocentries = nooffprocentries;
3839ccd8e176SBarry Smith 
38406a3d2595SBarry Smith   /* count number of entries below block diagonal */
38416a3d2595SBarry Smith   PetscCall(PetscFree(Aij->ld));
38426a3d2595SBarry Smith   PetscCall(PetscCalloc1(m, &ld));
38436a3d2595SBarry Smith   Aij->ld = ld;
38446a3d2595SBarry Smith   for (i = 0; i < m; i++) {
38456a3d2595SBarry Smith     nnz = Ii[i + 1] - Ii[i];
38466a3d2595SBarry Smith     j   = 0;
3847ad540459SPierre Jolivet     while (j < nnz && J[j] < cstart) j++;
38486a3d2595SBarry Smith     ld[i] = j;
38496a3d2595SBarry Smith     J += nnz;
38506a3d2595SBarry Smith   }
38516a3d2595SBarry Smith 
38529566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3853ccd8e176SBarry Smith   PetscFunctionReturn(0);
3854ccd8e176SBarry Smith }
3855ccd8e176SBarry Smith 
38561eea217eSSatish Balay /*@
385711a5261eSBarry Smith    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3858ccd8e176SBarry Smith    (the default parallel PETSc format).
3859ccd8e176SBarry Smith 
3860d083f849SBarry Smith    Collective
3861ccd8e176SBarry Smith 
3862ccd8e176SBarry Smith    Input Parameters:
3863a1661176SMatthew Knepley +  B - the matrix
3864ccd8e176SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
38650daa03b5SJed Brown .  j - the column indices for each local row (starts with zero)
3866ccd8e176SBarry Smith -  v - optional values in the matrix
3867ccd8e176SBarry Smith 
3868ccd8e176SBarry Smith    Level: developer
3869ccd8e176SBarry Smith 
387012251496SSatish Balay    Notes:
3871c1c1d628SHong Zhang        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3872c1c1d628SHong Zhang      thus you CANNOT change the matrix entries by changing the values of v[] after you have
387311a5261eSBarry Smith      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
387412251496SSatish Balay 
387512251496SSatish Balay        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
387612251496SSatish Balay 
387712251496SSatish Balay        The format which is used for the sparse matrix input, is equivalent to a
387812251496SSatish Balay     row-major ordering.. i.e for the following matrix, the input data expected is
3879c5e4d11fSDmitry Karpeev     as shown
388012251496SSatish Balay 
3881c5e4d11fSDmitry Karpeev $        1 0 0
3882c5e4d11fSDmitry Karpeev $        2 0 3     P0
3883c5e4d11fSDmitry Karpeev $       -------
3884c5e4d11fSDmitry Karpeev $        4 5 6     P1
3885c5e4d11fSDmitry Karpeev $
3886c5e4d11fSDmitry Karpeev $     Process0 [P0]: rows_owned=[0,1]
3887c5e4d11fSDmitry Karpeev $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3888c5e4d11fSDmitry Karpeev $        j =  {0,0,2}  [size = 3]
3889c5e4d11fSDmitry Karpeev $        v =  {1,2,3}  [size = 3]
3890c5e4d11fSDmitry Karpeev $
3891c5e4d11fSDmitry Karpeev $     Process1 [P1]: rows_owned=[2]
3892c5e4d11fSDmitry Karpeev $        i =  {0,3}    [size = nrow+1  = 1+1]
3893c5e4d11fSDmitry Karpeev $        j =  {0,1,2}  [size = 3]
3894c5e4d11fSDmitry Karpeev $        v =  {4,5,6}  [size = 3]
389512251496SSatish Balay 
389611a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3897db781477SPatrick Sanan           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3898ccd8e176SBarry Smith @*/
38999371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3900ccd8e176SBarry Smith   PetscFunctionBegin;
3901cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3902ccd8e176SBarry Smith   PetscFunctionReturn(0);
3903ccd8e176SBarry Smith }
3904ccd8e176SBarry Smith 
3905273d9f13SBarry Smith /*@C
390611a5261eSBarry Smith    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3907273d9f13SBarry Smith    (the default parallel PETSc format).  For good matrix assembly performance
3908273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
3909273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
3911273d9f13SBarry Smith 
3912d083f849SBarry Smith    Collective
3913273d9f13SBarry Smith 
3914273d9f13SBarry Smith    Input Parameters:
39151c4f3114SJed Brown +  B - the matrix
3916273d9f13SBarry Smith .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917273d9f13SBarry Smith            (same value is used for all local rows)
3918273d9f13SBarry Smith .  d_nnz - array containing the number of nonzeros in the various rows of the
3919273d9f13SBarry Smith            DIAGONAL portion of the local submatrix (possibly different for each row)
392011a5261eSBarry Smith            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
3921273d9f13SBarry Smith            The size of this array is equal to the number of local rows, i.e 'm'.
39223287b5eaSJed Brown            For matrices that will be factored, you must leave room for (and set)
39233287b5eaSJed Brown            the diagonal entry even if it is zero.
3924273d9f13SBarry Smith .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925273d9f13SBarry Smith            submatrix (same value is used for all local rows).
3926273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various rows of the
3927273d9f13SBarry Smith            OFF-DIAGONAL portion of the local submatrix (possibly different for
392811a5261eSBarry Smith            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
3929273d9f13SBarry Smith            structure. The size of this array is equal to the number
3930273d9f13SBarry Smith            of local rows, i.e 'm'.
3931273d9f13SBarry Smith 
393249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
393349a6f317SBarry Smith 
393411a5261eSBarry Smith    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
39350598bfebSBarry Smith    storage.  The stored row and column indices begin with zero.
3936651615e1SBarry Smith    See [Sparse Matrices](sec_matsparse) for details.
3937273d9f13SBarry Smith 
3938273d9f13SBarry Smith    The parallel matrix is partitioned such that the first m0 rows belong to
3939273d9f13SBarry Smith    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3940273d9f13SBarry Smith    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3941273d9f13SBarry Smith 
3942273d9f13SBarry Smith    The DIAGONAL portion of the local submatrix of a processor can be defined
3943a05b864aSJed Brown    as the submatrix which is obtained by extraction the part corresponding to
3944a05b864aSJed Brown    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3945a05b864aSJed Brown    first row that belongs to the processor, r2 is the last row belonging to
3946a05b864aSJed Brown    the this processor, and c1-c2 is range of indices of the local part of a
3947a05b864aSJed Brown    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3948a05b864aSJed Brown    common case of a square matrix, the row and column ranges are the same and
3949a05b864aSJed Brown    the DIAGONAL part is also square. The remaining portion of the local
3950a05b864aSJed Brown    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3951273d9f13SBarry Smith 
3952273d9f13SBarry Smith    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3953273d9f13SBarry Smith 
3954aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3955aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3956aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3957aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3958aa95bbe8SBarry Smith 
3959273d9f13SBarry Smith    Example usage:
3960273d9f13SBarry Smith 
3961273d9f13SBarry Smith    Consider the following 8x8 matrix with 34 non-zero values, that is
3962273d9f13SBarry Smith    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3963273d9f13SBarry Smith    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3964273d9f13SBarry Smith    as follows:
3965273d9f13SBarry Smith 
3966273d9f13SBarry Smith .vb
3967273d9f13SBarry Smith             1  2  0  |  0  3  0  |  0  4
3968273d9f13SBarry Smith     Proc0   0  5  6  |  7  0  0  |  8  0
3969273d9f13SBarry Smith             9  0 10  | 11  0  0  | 12  0
3970273d9f13SBarry Smith     -------------------------------------
3971273d9f13SBarry Smith            13  0 14  | 15 16 17  |  0  0
3972273d9f13SBarry Smith     Proc1   0 18  0  | 19 20 21  |  0  0
3973273d9f13SBarry Smith             0  0  0  | 22 23  0  | 24  0
3974273d9f13SBarry Smith     -------------------------------------
3975273d9f13SBarry Smith     Proc2  25 26 27  |  0  0 28  | 29  0
3976273d9f13SBarry Smith            30  0  0  | 31 32 33  |  0 34
3977273d9f13SBarry Smith .ve
3978273d9f13SBarry Smith 
3979273d9f13SBarry Smith    This can be represented as a collection of submatrices as:
3980273d9f13SBarry Smith 
3981273d9f13SBarry Smith .vb
3982273d9f13SBarry Smith       A B C
3983273d9f13SBarry Smith       D E F
3984273d9f13SBarry Smith       G H I
3985273d9f13SBarry Smith .ve
3986273d9f13SBarry Smith 
3987273d9f13SBarry Smith    Where the submatrices A,B,C are owned by proc0, D,E,F are
3988273d9f13SBarry Smith    owned by proc1, G,H,I are owned by proc2.
3989273d9f13SBarry Smith 
3990273d9f13SBarry Smith    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3991273d9f13SBarry Smith    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992273d9f13SBarry Smith    The 'M','N' parameters are 8,8, and have the same values on all procs.
3993273d9f13SBarry Smith 
3994273d9f13SBarry Smith    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3995273d9f13SBarry Smith    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3996273d9f13SBarry Smith    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3997273d9f13SBarry Smith    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
399811a5261eSBarry Smith    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
399911a5261eSBarry Smith    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4000273d9f13SBarry Smith 
4001273d9f13SBarry Smith    When d_nz, o_nz parameters are specified, d_nz storage elements are
4002273d9f13SBarry Smith    allocated for every row of the local diagonal submatrix, and o_nz
4003273d9f13SBarry Smith    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4004273d9f13SBarry Smith    One way to choose d_nz and o_nz is to use the max nonzerors per local
4005273d9f13SBarry Smith    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4006273d9f13SBarry Smith    In this case, the values of d_nz,o_nz are:
4007273d9f13SBarry Smith .vb
4008273d9f13SBarry Smith      proc0 : dnz = 2, o_nz = 2
4009273d9f13SBarry Smith      proc1 : dnz = 3, o_nz = 2
4010273d9f13SBarry Smith      proc2 : dnz = 1, o_nz = 4
4011273d9f13SBarry Smith .ve
4012273d9f13SBarry Smith    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4013273d9f13SBarry Smith    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4014273d9f13SBarry Smith    for proc3. i.e we are using 12+15+10=37 storage locations to store
4015273d9f13SBarry Smith    34 values.
4016273d9f13SBarry Smith 
4017273d9f13SBarry Smith    When d_nnz, o_nnz parameters are specified, the storage is specified
4018a5b23f4aSJose E. Roman    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4019273d9f13SBarry Smith    In the above case the values for d_nnz,o_nnz are:
4020273d9f13SBarry Smith .vb
4021273d9f13SBarry Smith      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4022273d9f13SBarry Smith      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4023273d9f13SBarry Smith      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4024273d9f13SBarry Smith .ve
4025273d9f13SBarry Smith    Here the space allocated is sum of all the above values i.e 34, and
4026273d9f13SBarry Smith    hence pre-allocation is perfect.
4027273d9f13SBarry Smith 
4028273d9f13SBarry Smith    Level: intermediate
4029273d9f13SBarry Smith 
4030651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4031db781477SPatrick Sanan           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4032273d9f13SBarry Smith @*/
40339371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
4034273d9f13SBarry Smith   PetscFunctionBegin;
40356ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
40366ba663aaSJed Brown   PetscValidType(B, 1);
4037cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4038273d9f13SBarry Smith   PetscFunctionReturn(0);
4039273d9f13SBarry Smith }
4040273d9f13SBarry Smith 
404158d36128SBarry Smith /*@
404211a5261eSBarry Smith      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
40438f8f2f0dSBarry Smith          CSR format for the local rows.
40442fb0ec9aSBarry Smith 
4045d083f849SBarry Smith    Collective
40462fb0ec9aSBarry Smith 
40472fb0ec9aSBarry Smith    Input Parameters:
40482fb0ec9aSBarry Smith +  comm - MPI communicator
404911a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
40502fb0ec9aSBarry Smith .  n - This value should be the same as the local size used in creating the
405111a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
40522fb0ec9aSBarry Smith        calculated if N is given) For square matrices n is almost always m.
405311a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
405411a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4055483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
40562fb0ec9aSBarry Smith .   j - column indices
4057f1f2ae84SBarry Smith -   a - optional matrix values
40582fb0ec9aSBarry Smith 
40592fb0ec9aSBarry Smith    Output Parameter:
40602fb0ec9aSBarry Smith .   mat - the matrix
406103bfb495SBarry Smith 
40622fb0ec9aSBarry Smith    Level: intermediate
40632fb0ec9aSBarry Smith 
40642fb0ec9aSBarry Smith    Notes:
40652fb0ec9aSBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
40662fb0ec9aSBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
40678d7a6e47SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
40682fb0ec9aSBarry Smith 
406912251496SSatish Balay        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
407012251496SSatish Balay 
407112251496SSatish Balay        The format which is used for the sparse matrix input, is equivalent to a
407212251496SSatish Balay     row-major ordering.. i.e for the following matrix, the input data expected is
4073c5e4d11fSDmitry Karpeev     as shown
407412251496SSatish Balay 
40758f8f2f0dSBarry Smith        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
40768f8f2f0dSBarry Smith 
4077c5e4d11fSDmitry Karpeev $        1 0 0
4078c5e4d11fSDmitry Karpeev $        2 0 3     P0
4079c5e4d11fSDmitry Karpeev $       -------
4080c5e4d11fSDmitry Karpeev $        4 5 6     P1
4081c5e4d11fSDmitry Karpeev $
4082c5e4d11fSDmitry Karpeev $     Process0 [P0]: rows_owned=[0,1]
4083c5e4d11fSDmitry Karpeev $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4084c5e4d11fSDmitry Karpeev $        j =  {0,0,2}  [size = 3]
4085c5e4d11fSDmitry Karpeev $        v =  {1,2,3}  [size = 3]
4086c5e4d11fSDmitry Karpeev $
4087c5e4d11fSDmitry Karpeev $     Process1 [P1]: rows_owned=[2]
4088c5e4d11fSDmitry Karpeev $        i =  {0,3}    [size = nrow+1  = 1+1]
4089c5e4d11fSDmitry Karpeev $        j =  {0,1,2}  [size = 3]
4090c5e4d11fSDmitry Karpeev $        v =  {4,5,6}  [size = 3]
40912fb0ec9aSBarry Smith 
409211a5261eSBarry Smith .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4093db781477SPatrick Sanan           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
40942fb0ec9aSBarry Smith @*/
40959371c9d4SSatish Balay PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
40962fb0ec9aSBarry Smith   PetscFunctionBegin;
409708401ef6SPierre Jolivet   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
409808401ef6SPierre Jolivet   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
40999566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
41009566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
41019566063dSJacob Faibussowitsch   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
41029566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPIAIJ));
41039566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
41042fb0ec9aSBarry Smith   PetscFunctionReturn(0);
41052fb0ec9aSBarry Smith }
41062fb0ec9aSBarry Smith 
41078f8f2f0dSBarry Smith /*@
410811a5261eSBarry Smith      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
410911a5261eSBarry Smith          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
41106a3d2595SBarry Smith 
41116a3d2595SBarry Smith      Deprecated: Use `MatUpdateMPIAIJWithArray()`
41128f8f2f0dSBarry Smith 
41138f8f2f0dSBarry Smith    Collective
41148f8f2f0dSBarry Smith 
41158f8f2f0dSBarry Smith    Input Parameters:
41168f8f2f0dSBarry Smith +  mat - the matrix
411711a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
41188f8f2f0dSBarry Smith .  n - This value should be the same as the local size used in creating the
411911a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
41208f8f2f0dSBarry Smith        calculated if N is given) For square matrices n is almost always m.
412111a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
412211a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
41238f8f2f0dSBarry Smith .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
41248f8f2f0dSBarry Smith .  J - column indices
41258f8f2f0dSBarry Smith -  v - matrix values
41268f8f2f0dSBarry Smith 
41278f8f2f0dSBarry Smith    Level: intermediate
41288f8f2f0dSBarry Smith 
412911a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
41306a3d2595SBarry Smith           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
41318f8f2f0dSBarry Smith @*/
41329371c9d4SSatish Balay PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
41336a3d2595SBarry Smith   PetscInt        nnz, i;
41348f8f2f0dSBarry Smith   PetscBool       nooffprocentries;
41358f8f2f0dSBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4136fff043a9SJunchao Zhang   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4137fff043a9SJunchao Zhang   PetscScalar    *ad, *ao;
41388f8f2f0dSBarry Smith   PetscInt        ldi, Iii, md;
41396a3d2595SBarry Smith   const PetscInt *Adi = Ad->i;
41406a3d2595SBarry Smith   PetscInt       *ld  = Aij->ld;
41418f8f2f0dSBarry Smith 
41428f8f2f0dSBarry Smith   PetscFunctionBegin;
4143aed4548fSBarry Smith   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
414408401ef6SPierre Jolivet   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
414508401ef6SPierre Jolivet   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
414608401ef6SPierre Jolivet   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
41478f8f2f0dSBarry Smith 
41489566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
41499566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
41508f8f2f0dSBarry Smith 
41518f8f2f0dSBarry Smith   for (i = 0; i < m; i++) {
41528f8f2f0dSBarry Smith     nnz = Ii[i + 1] - Ii[i];
41538f8f2f0dSBarry Smith     Iii = Ii[i];
41548f8f2f0dSBarry Smith     ldi = ld[i];
41558f8f2f0dSBarry Smith     md  = Adi[i + 1] - Adi[i];
41569566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
41579566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
41589566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
41598f8f2f0dSBarry Smith     ad += md;
41608f8f2f0dSBarry Smith     ao += nnz - md;
41618f8f2f0dSBarry Smith   }
41628f8f2f0dSBarry Smith   nooffprocentries      = mat->nooffprocentries;
41638f8f2f0dSBarry Smith   mat->nooffprocentries = PETSC_TRUE;
41649566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
41659566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
41669566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
41679566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
41689566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
41699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
41709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
41718f8f2f0dSBarry Smith   mat->nooffprocentries = nooffprocentries;
41728f8f2f0dSBarry Smith   PetscFunctionReturn(0);
41738f8f2f0dSBarry Smith }
41748f8f2f0dSBarry Smith 
41756a3d2595SBarry Smith /*@
417611a5261eSBarry Smith      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
41776a3d2595SBarry Smith 
41786a3d2595SBarry Smith    Collective
41796a3d2595SBarry Smith 
41806a3d2595SBarry Smith    Input Parameters:
41816a3d2595SBarry Smith +  mat - the matrix
41826a3d2595SBarry Smith -  v - matrix values, stored by row
41836a3d2595SBarry Smith 
41846a3d2595SBarry Smith    Level: intermediate
41856a3d2595SBarry Smith 
418611a5261eSBarry Smith    Note:
41876a3d2595SBarry Smith    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
41886a3d2595SBarry Smith 
41896a3d2595SBarry Smith .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
41906a3d2595SBarry Smith           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
41916a3d2595SBarry Smith @*/
41929371c9d4SSatish Balay PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) {
41936a3d2595SBarry Smith   PetscInt        nnz, i, m;
41946a3d2595SBarry Smith   PetscBool       nooffprocentries;
41956a3d2595SBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
41966a3d2595SBarry Smith   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
41976a3d2595SBarry Smith   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
41986a3d2595SBarry Smith   PetscScalar    *ad, *ao;
41996a3d2595SBarry Smith   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
42006a3d2595SBarry Smith   PetscInt        ldi, Iii, md;
42016a3d2595SBarry Smith   PetscInt       *ld = Aij->ld;
42026a3d2595SBarry Smith 
42036a3d2595SBarry Smith   PetscFunctionBegin;
42046a3d2595SBarry Smith   m = mat->rmap->n;
42056a3d2595SBarry Smith 
42066a3d2595SBarry Smith   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
42076a3d2595SBarry Smith   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
42086a3d2595SBarry Smith   Iii = 0;
42096a3d2595SBarry Smith   for (i = 0; i < m; i++) {
42106a3d2595SBarry Smith     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
42116a3d2595SBarry Smith     ldi = ld[i];
42126a3d2595SBarry Smith     md  = Adi[i + 1] - Adi[i];
42136a3d2595SBarry Smith     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
42146a3d2595SBarry Smith     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
42156a3d2595SBarry Smith     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
42166a3d2595SBarry Smith     ad += md;
42176a3d2595SBarry Smith     ao += nnz - md;
42186a3d2595SBarry Smith     Iii += nnz;
42196a3d2595SBarry Smith   }
42206a3d2595SBarry Smith   nooffprocentries      = mat->nooffprocentries;
42216a3d2595SBarry Smith   mat->nooffprocentries = PETSC_TRUE;
42226a3d2595SBarry Smith   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
42236a3d2595SBarry Smith   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
42246a3d2595SBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
42256a3d2595SBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
42266a3d2595SBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
42276a3d2595SBarry Smith   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
42286a3d2595SBarry Smith   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
42296a3d2595SBarry Smith   mat->nooffprocentries = nooffprocentries;
42306a3d2595SBarry Smith   PetscFunctionReturn(0);
42316a3d2595SBarry Smith }
42326a3d2595SBarry Smith 
4233273d9f13SBarry Smith /*@C
423411a5261eSBarry Smith    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4235273d9f13SBarry Smith    (the default parallel PETSc format).  For good matrix assembly performance
4236273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
4237273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4238273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
4239273d9f13SBarry Smith 
4240d083f849SBarry Smith    Collective
4241273d9f13SBarry Smith 
4242273d9f13SBarry Smith    Input Parameters:
4243273d9f13SBarry Smith +  comm - MPI communicator
424411a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4245273d9f13SBarry Smith            This value should be the same as the local size used in creating the
4246273d9f13SBarry Smith            y vector for the matrix-vector product y = Ax.
4247273d9f13SBarry Smith .  n - This value should be the same as the local size used in creating the
4248273d9f13SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4249273d9f13SBarry Smith        calculated if N is given) For square matrices n is almost always m.
425011a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
425111a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4252273d9f13SBarry Smith .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4253273d9f13SBarry Smith            (same value is used for all local rows)
4254273d9f13SBarry Smith .  d_nnz - array containing the number of nonzeros in the various rows of the
4255273d9f13SBarry Smith            DIAGONAL portion of the local submatrix (possibly different for each row)
42560298fd71SBarry Smith            or NULL, if d_nz is used to specify the nonzero structure.
4257273d9f13SBarry Smith            The size of this array is equal to the number of local rows, i.e 'm'.
4258273d9f13SBarry Smith .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4259273d9f13SBarry Smith            submatrix (same value is used for all local rows).
4260273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various rows of the
4261273d9f13SBarry Smith            OFF-DIAGONAL portion of the local submatrix (possibly different for
42620298fd71SBarry Smith            each row) or NULL, if o_nz is used to specify the nonzero
4263273d9f13SBarry Smith            structure. The size of this array is equal to the number
4264273d9f13SBarry Smith            of local rows, i.e 'm'.
4265273d9f13SBarry Smith 
4266273d9f13SBarry Smith    Output Parameter:
4267273d9f13SBarry Smith .  A - the matrix
4268273d9f13SBarry Smith 
426911a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4270f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
427111a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4272175b88e8SBarry Smith 
4273273d9f13SBarry Smith    Notes:
427449a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
427549a6f317SBarry Smith 
4276273d9f13SBarry Smith    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4277273d9f13SBarry Smith    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4278273d9f13SBarry Smith    storage requirements for this matrix.
4279273d9f13SBarry Smith 
428011a5261eSBarry Smith    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4281273d9f13SBarry Smith    processor than it must be used on all processors that share the object for
4282273d9f13SBarry Smith    that argument.
4283273d9f13SBarry Smith 
4284273d9f13SBarry Smith    The user MUST specify either the local or global matrix dimensions
4285273d9f13SBarry Smith    (possibly both).
4286273d9f13SBarry Smith 
428733a7c187SSatish Balay    The parallel matrix is partitioned across processors such that the
428833a7c187SSatish Balay    first m0 rows belong to process 0, the next m1 rows belong to
428933a7c187SSatish Balay    process 1, the next m2 rows belong to process 2 etc.. where
429033a7c187SSatish Balay    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
429133a7c187SSatish Balay    values corresponding to [m x N] submatrix.
4292273d9f13SBarry Smith 
429333a7c187SSatish Balay    The columns are logically partitioned with the n0 columns belonging
429433a7c187SSatish Balay    to 0th partition, the next n1 columns belonging to the next
4295df3898eeSBarry Smith    partition etc.. where n0,n1,n2... are the input parameter 'n'.
429633a7c187SSatish Balay 
429733a7c187SSatish Balay    The DIAGONAL portion of the local submatrix on any given processor
429833a7c187SSatish Balay    is the submatrix corresponding to the rows and columns m,n
429933a7c187SSatish Balay    corresponding to the given processor. i.e diagonal matrix on
430033a7c187SSatish Balay    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
430133a7c187SSatish Balay    etc. The remaining portion of the local submatrix [m x (N-n)]
430233a7c187SSatish Balay    constitute the OFF-DIAGONAL portion. The example below better
430333a7c187SSatish Balay    illustrates this concept.
430433a7c187SSatish Balay 
430533a7c187SSatish Balay    For a square global matrix we define each processor's diagonal portion
430633a7c187SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
430733a7c187SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
430833a7c187SSatish Balay    local matrix (a rectangular submatrix).
4309273d9f13SBarry Smith 
4310273d9f13SBarry Smith    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4311273d9f13SBarry Smith 
431297d05335SKris Buschelman    When calling this routine with a single process communicator, a matrix of
431397d05335SKris Buschelman    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4314da57b5cdSKarl Rupp    type of communicator, use the construction mechanism
4315da57b5cdSKarl Rupp .vb
431678102f6cSMatthew Knepley      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4317da57b5cdSKarl Rupp .ve
431897d05335SKris Buschelman 
4319f1058c0fSBarry Smith $     MatCreate(...,&A);
4320f1058c0fSBarry Smith $     MatSetType(A,MATMPIAIJ);
4321f1058c0fSBarry Smith $     MatSetSizes(A, m,n,M,N);
4322f1058c0fSBarry Smith $     MatMPIAIJSetPreallocation(A,...);
4323f1058c0fSBarry Smith 
4324273d9f13SBarry Smith    By default, this format uses inodes (identical nodes) when possible.
4325273d9f13SBarry Smith    We search for consecutive rows with the same nonzero structure, thereby
4326273d9f13SBarry Smith    reusing matrix information to achieve increased efficiency.
4327273d9f13SBarry Smith 
4328273d9f13SBarry Smith    Options Database Keys:
4329923f20ffSKris Buschelman +  -mat_no_inode  - Do not use inodes
43302f3b2168SJunchao Zhang .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
433111a5261eSBarry Smith -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
433211a5261eSBarry Smith         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
433311a5261eSBarry Smith         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
433447b2e64bSBarry Smith 
4335273d9f13SBarry Smith    Example usage:
4336273d9f13SBarry Smith 
4337273d9f13SBarry Smith    Consider the following 8x8 matrix with 34 non-zero values, that is
4338273d9f13SBarry Smith    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4339273d9f13SBarry Smith    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4340efc377ccSKarl Rupp    as follows
4341273d9f13SBarry Smith 
4342273d9f13SBarry Smith .vb
4343273d9f13SBarry Smith             1  2  0  |  0  3  0  |  0  4
4344273d9f13SBarry Smith     Proc0   0  5  6  |  7  0  0  |  8  0
4345273d9f13SBarry Smith             9  0 10  | 11  0  0  | 12  0
4346273d9f13SBarry Smith     -------------------------------------
4347273d9f13SBarry Smith            13  0 14  | 15 16 17  |  0  0
4348273d9f13SBarry Smith     Proc1   0 18  0  | 19 20 21  |  0  0
4349273d9f13SBarry Smith             0  0  0  | 22 23  0  | 24  0
4350273d9f13SBarry Smith     -------------------------------------
4351273d9f13SBarry Smith     Proc2  25 26 27  |  0  0 28  | 29  0
4352273d9f13SBarry Smith            30  0  0  | 31 32 33  |  0 34
4353273d9f13SBarry Smith .ve
4354273d9f13SBarry Smith 
4355da57b5cdSKarl Rupp    This can be represented as a collection of submatrices as
4356273d9f13SBarry Smith 
4357273d9f13SBarry Smith .vb
4358273d9f13SBarry Smith       A B C
4359273d9f13SBarry Smith       D E F
4360273d9f13SBarry Smith       G H I
4361273d9f13SBarry Smith .ve
4362273d9f13SBarry Smith 
4363273d9f13SBarry Smith    Where the submatrices A,B,C are owned by proc0, D,E,F are
4364273d9f13SBarry Smith    owned by proc1, G,H,I are owned by proc2.
4365273d9f13SBarry Smith 
4366273d9f13SBarry Smith    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4367273d9f13SBarry Smith    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4368273d9f13SBarry Smith    The 'M','N' parameters are 8,8, and have the same values on all procs.
4369273d9f13SBarry Smith 
4370273d9f13SBarry Smith    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4371273d9f13SBarry Smith    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4372273d9f13SBarry Smith    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4373273d9f13SBarry Smith    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4374273d9f13SBarry Smith    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4375273d9f13SBarry Smith    matrix, ans [DF] as another SeqAIJ matrix.
4376273d9f13SBarry Smith 
4377273d9f13SBarry Smith    When d_nz, o_nz parameters are specified, d_nz storage elements are
4378273d9f13SBarry Smith    allocated for every row of the local diagonal submatrix, and o_nz
4379273d9f13SBarry Smith    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4380273d9f13SBarry Smith    One way to choose d_nz and o_nz is to use the max nonzerors per local
4381273d9f13SBarry Smith    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4382da57b5cdSKarl Rupp    In this case, the values of d_nz,o_nz are
4383273d9f13SBarry Smith .vb
4384273d9f13SBarry Smith      proc0 : dnz = 2, o_nz = 2
4385273d9f13SBarry Smith      proc1 : dnz = 3, o_nz = 2
4386273d9f13SBarry Smith      proc2 : dnz = 1, o_nz = 4
4387273d9f13SBarry Smith .ve
4388273d9f13SBarry Smith    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4389273d9f13SBarry Smith    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4390273d9f13SBarry Smith    for proc3. i.e we are using 12+15+10=37 storage locations to store
4391273d9f13SBarry Smith    34 values.
4392273d9f13SBarry Smith 
4393273d9f13SBarry Smith    When d_nnz, o_nnz parameters are specified, the storage is specified
4394a5b23f4aSJose E. Roman    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4395da57b5cdSKarl Rupp    In the above case the values for d_nnz,o_nnz are
4396273d9f13SBarry Smith .vb
4397273d9f13SBarry Smith      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4398273d9f13SBarry Smith      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4399273d9f13SBarry Smith      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4400273d9f13SBarry Smith .ve
4401273d9f13SBarry Smith    Here the space allocated is sum of all the above values i.e 34, and
4402273d9f13SBarry Smith    hence pre-allocation is perfect.
4403273d9f13SBarry Smith 
4404273d9f13SBarry Smith    Level: intermediate
4405273d9f13SBarry Smith 
440660161072SBarry Smith .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4407db781477SPatrick Sanan           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4408273d9f13SBarry Smith @*/
44099371c9d4SSatish Balay PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
4410b1d57f15SBarry Smith   PetscMPIInt size;
4411273d9f13SBarry Smith 
4412273d9f13SBarry Smith   PetscFunctionBegin;
44139566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
44149566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
44159566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4416273d9f13SBarry Smith   if (size > 1) {
44179566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPIAIJ));
44189566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4419273d9f13SBarry Smith   } else {
44209566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQAIJ));
44219566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4422273d9f13SBarry Smith   }
4423273d9f13SBarry Smith   PetscFunctionReturn(0);
4424273d9f13SBarry Smith }
4425195d93cdSBarry Smith 
4426127ca0efSMatthew Knepley /*@C
4427127ca0efSMatthew Knepley   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4428127ca0efSMatthew Knepley 
4429127ca0efSMatthew Knepley   Not collective
4430127ca0efSMatthew Knepley 
4431127ca0efSMatthew Knepley   Input Parameter:
443211a5261eSBarry Smith . A - The `MATMPIAIJ` matrix
4433127ca0efSMatthew Knepley 
4434127ca0efSMatthew Knepley   Output Parameters:
443511a5261eSBarry Smith + Ad - The local diagonal block as a `MATSEQAIJ` matrix
443611a5261eSBarry Smith . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4437127ca0efSMatthew Knepley - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4438127ca0efSMatthew Knepley 
443911a5261eSBarry Smith   Note:
444011a5261eSBarry Smith   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4441127ca0efSMatthew Knepley   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4442127ca0efSMatthew Knepley   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4443127ca0efSMatthew Knepley   local column numbers to global column numbers in the original matrix.
4444127ca0efSMatthew Knepley 
4445127ca0efSMatthew Knepley   Level: intermediate
4446127ca0efSMatthew Knepley 
444711a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4448127ca0efSMatthew Knepley @*/
44499371c9d4SSatish Balay PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
4450195d93cdSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
445104cf37c7SBarry Smith   PetscBool   flg;
4452b1d57f15SBarry Smith 
4453195d93cdSBarry Smith   PetscFunctionBegin;
44549566063dSJacob Faibussowitsch   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
445528b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
445621e72a00SBarry Smith   if (Ad) *Ad = a->A;
445721e72a00SBarry Smith   if (Ao) *Ao = a->B;
445821e72a00SBarry Smith   if (colmap) *colmap = a->garray;
4459195d93cdSBarry Smith   PetscFunctionReturn(0);
4460195d93cdSBarry Smith }
4461a2243be0SBarry Smith 
44629371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
4463110bb6e1SHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii;
44649b8102ccSHong Zhang   PetscInt    *indx;
4465110bb6e1SHong Zhang   PetscScalar *values;
4466421ddf4dSJunchao Zhang   MatType      rootType;
44679b8102ccSHong Zhang 
44689b8102ccSHong Zhang   PetscFunctionBegin;
44699566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
4470110bb6e1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4471110bb6e1SHong Zhang     PetscInt *dnz, *onz, sum, bs, cbs;
4472110bb6e1SHong Zhang 
447348a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4474a22543b6SHong Zhang     /* Check sum(n) = N */
44751c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
447608401ef6SPierre Jolivet     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4477a22543b6SHong Zhang 
44789566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
44799b8102ccSHong Zhang     rstart -= m;
44809b8102ccSHong Zhang 
4481d0609cedSBarry Smith     MatPreallocateBegin(comm, m, n, dnz, onz);
44829b8102ccSHong Zhang     for (i = 0; i < m; i++) {
44839566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
44849566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
44859566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
44869b8102ccSHong Zhang     }
44879b8102ccSHong Zhang 
44889566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
44899566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
44909566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
44919566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
44929566063dSJacob Faibussowitsch     PetscCall(MatGetRootType_Private(inmat, &rootType));
44939566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, rootType));
44949566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
44959566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4496d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
44979566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
44989b8102ccSHong Zhang   }
44999b8102ccSHong Zhang 
4500110bb6e1SHong Zhang   /* numeric phase */
45019566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
45029b8102ccSHong Zhang   for (i = 0; i < m; i++) {
45039566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
45049b8102ccSHong Zhang     Ii = i + rstart;
45059566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
45069566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
45079b8102ccSHong Zhang   }
45089566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
45099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4510c5d6d63eSBarry Smith   PetscFunctionReturn(0);
4511c5d6d63eSBarry Smith }
4512c5d6d63eSBarry Smith 
45139371c9d4SSatish Balay PetscErrorCode MatFileSplit(Mat A, char *outfile) {
451432dcc486SBarry Smith   PetscMPIInt        rank;
4515b1d57f15SBarry Smith   PetscInt           m, N, i, rstart, nnz;
4516de4209c5SBarry Smith   size_t             len;
4517b1d57f15SBarry Smith   const PetscInt    *indx;
4518c5d6d63eSBarry Smith   PetscViewer        out;
4519c5d6d63eSBarry Smith   char              *name;
4520c5d6d63eSBarry Smith   Mat                B;
4521b3cc6726SBarry Smith   const PetscScalar *values;
4522c5d6d63eSBarry Smith 
4523c5d6d63eSBarry Smith   PetscFunctionBegin;
45249566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &m, NULL));
45259566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, NULL, &N));
4526f204ca49SKris Buschelman   /* Should this be the type of the diagonal block of A? */
45279566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
45289566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, m, N, m, N));
45299566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(B, A, A));
45309566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, MATSEQAIJ));
45319566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
45329566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4533c5d6d63eSBarry Smith   for (i = 0; i < m; i++) {
45349566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
45359566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
45369566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4537c5d6d63eSBarry Smith   }
45389566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
45399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4540c5d6d63eSBarry Smith 
45419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
45429566063dSJacob Faibussowitsch   PetscCall(PetscStrlen(outfile, &len));
45439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(len + 6, &name));
45449566063dSJacob Faibussowitsch   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
45459566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
45469566063dSJacob Faibussowitsch   PetscCall(PetscFree(name));
45479566063dSJacob Faibussowitsch   PetscCall(MatView(B, out));
45489566063dSJacob Faibussowitsch   PetscCall(PetscViewerDestroy(&out));
45499566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
4550c5d6d63eSBarry Smith   PetscFunctionReturn(0);
4551c5d6d63eSBarry Smith }
4552e5f2cdd8SHong Zhang 
45539371c9d4SSatish Balay static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) {
45546718818eSStefano Zampini   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
455551a7d1a8SHong Zhang 
455651a7d1a8SHong Zhang   PetscFunctionBegin;
45576718818eSStefano Zampini   if (!merge) PetscFunctionReturn(0);
45589566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->id_r));
45599566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->len_s));
45609566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->len_r));
45619566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->bi));
45629566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->bj));
45639566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_ri[0]));
45649566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_ri));
45659566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_rj[0]));
45669566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_rj));
45679566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->coi));
45689566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->coj));
45699566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->owners_co));
45709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&merge->rowmap));
45719566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge));
457251a7d1a8SHong Zhang   PetscFunctionReturn(0);
457351a7d1a8SHong Zhang }
457451a7d1a8SHong Zhang 
4575c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
4576c6db04a5SJed Brown #include <petscbt.h>
45774ebed01fSBarry Smith 
45789371c9d4SSatish Balay PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) {
4579ce94432eSBarry Smith   MPI_Comm             comm;
458055d1abb9SHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4581b1d57f15SBarry Smith   PetscMPIInt          size, rank, taga, *len_s;
4582a2ea699eSBarry Smith   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4583b1d57f15SBarry Smith   PetscInt             proc, m;
4584b1d57f15SBarry Smith   PetscInt           **buf_ri, **buf_rj;
4585b1d57f15SBarry Smith   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4586b1d57f15SBarry Smith   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
458755d1abb9SHong Zhang   MPI_Request         *s_waits, *r_waits;
458855d1abb9SHong Zhang   MPI_Status          *status;
4589fff043a9SJunchao Zhang   const MatScalar     *aa, *a_a;
4590dd6ea824SBarry Smith   MatScalar          **abuf_r, *ba_i;
459155d1abb9SHong Zhang   Mat_Merge_SeqsToMPI *merge;
4592776b82aeSLisandro Dalcin   PetscContainer       container;
459355d1abb9SHong Zhang 
459455d1abb9SHong Zhang   PetscFunctionBegin;
45959566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
45969566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
45973c2c1871SHong Zhang 
45989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
45999566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
460055d1abb9SHong Zhang 
46019566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
460228b400f6SJacob Faibussowitsch   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
46039566063dSJacob Faibussowitsch   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
46049566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4605fff043a9SJunchao Zhang   aa = a_a;
4606bf0cc555SLisandro Dalcin 
460755d1abb9SHong Zhang   bi     = merge->bi;
460855d1abb9SHong Zhang   bj     = merge->bj;
460955d1abb9SHong Zhang   buf_ri = merge->buf_ri;
461055d1abb9SHong Zhang   buf_rj = merge->buf_rj;
461155d1abb9SHong Zhang 
46129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &status));
46137a2fc3feSBarry Smith   owners = merge->rowmap->range;
461455d1abb9SHong Zhang   len_s  = merge->len_s;
461555d1abb9SHong Zhang 
461655d1abb9SHong Zhang   /* send and recv matrix values */
461755d1abb9SHong Zhang   /*-----------------------------*/
46189566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
46199566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
462055d1abb9SHong Zhang 
46219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
462255d1abb9SHong Zhang   for (proc = 0, k = 0; proc < size; proc++) {
462355d1abb9SHong Zhang     if (!len_s[proc]) continue;
462455d1abb9SHong Zhang     i = owners[proc];
46259566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
462655d1abb9SHong Zhang     k++;
462755d1abb9SHong Zhang   }
462855d1abb9SHong Zhang 
46299566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
46309566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
46319566063dSJacob Faibussowitsch   PetscCall(PetscFree(status));
463255d1abb9SHong Zhang 
46339566063dSJacob Faibussowitsch   PetscCall(PetscFree(s_waits));
46349566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_waits));
463555d1abb9SHong Zhang 
463655d1abb9SHong Zhang   /* insert mat values of mpimat */
463755d1abb9SHong Zhang   /*----------------------------*/
46389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N, &ba_i));
46399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
464055d1abb9SHong Zhang 
464155d1abb9SHong Zhang   for (k = 0; k < merge->nrecv; k++) {
464255d1abb9SHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
464355d1abb9SHong Zhang     nrows       = *(buf_ri_k[k]);
464455d1abb9SHong Zhang     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4645a5b23f4aSJose E. Roman     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
464655d1abb9SHong Zhang   }
464755d1abb9SHong Zhang 
464855d1abb9SHong Zhang   /* set values of ba */
46497a2fc3feSBarry Smith   m = merge->rowmap->n;
465055d1abb9SHong Zhang   for (i = 0; i < m; i++) {
465155d1abb9SHong Zhang     arow = owners[rank] + i;
465255d1abb9SHong Zhang     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
465355d1abb9SHong Zhang     bnzi = bi[i + 1] - bi[i];
46549566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ba_i, bnzi));
465555d1abb9SHong Zhang 
465655d1abb9SHong Zhang     /* add local non-zero vals of this proc's seqmat into ba */
465755d1abb9SHong Zhang     anzi   = ai[arow + 1] - ai[arow];
465855d1abb9SHong Zhang     aj     = a->j + ai[arow];
4659fff043a9SJunchao Zhang     aa     = a_a + ai[arow];
466055d1abb9SHong Zhang     nextaj = 0;
466155d1abb9SHong Zhang     for (j = 0; nextaj < anzi; j++) {
466255d1abb9SHong Zhang       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
466355d1abb9SHong Zhang         ba_i[j] += aa[nextaj++];
466455d1abb9SHong Zhang       }
466555d1abb9SHong Zhang     }
466655d1abb9SHong Zhang 
466755d1abb9SHong Zhang     /* add received vals into ba */
466855d1abb9SHong Zhang     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
466955d1abb9SHong Zhang       /* i-th row */
467055d1abb9SHong Zhang       if (i == *nextrow[k]) {
467155d1abb9SHong Zhang         anzi   = *(nextai[k] + 1) - *nextai[k];
467255d1abb9SHong Zhang         aj     = buf_rj[k] + *(nextai[k]);
467355d1abb9SHong Zhang         aa     = abuf_r[k] + *(nextai[k]);
467455d1abb9SHong Zhang         nextaj = 0;
467555d1abb9SHong Zhang         for (j = 0; nextaj < anzi; j++) {
467655d1abb9SHong Zhang           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
467755d1abb9SHong Zhang             ba_i[j] += aa[nextaj++];
467855d1abb9SHong Zhang           }
467955d1abb9SHong Zhang         }
46809371c9d4SSatish Balay         nextrow[k]++;
46819371c9d4SSatish Balay         nextai[k]++;
468255d1abb9SHong Zhang       }
468355d1abb9SHong Zhang     }
46849566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
468555d1abb9SHong Zhang   }
46869566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
46879566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
46889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
468955d1abb9SHong Zhang 
46909566063dSJacob Faibussowitsch   PetscCall(PetscFree(abuf_r[0]));
46919566063dSJacob Faibussowitsch   PetscCall(PetscFree(abuf_r));
46929566063dSJacob Faibussowitsch   PetscCall(PetscFree(ba_i));
46939566063dSJacob Faibussowitsch   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
46949566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
469555d1abb9SHong Zhang   PetscFunctionReturn(0);
469655d1abb9SHong Zhang }
469738f152feSBarry Smith 
46989371c9d4SSatish Balay PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) {
469955a3bba9SHong Zhang   Mat                  B_mpi;
4700c2234fe3SHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4701b1d57f15SBarry Smith   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4702b1d57f15SBarry Smith   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4703d0f46423SBarry Smith   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4704a2f3521dSMark F. Adams   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4705c599c493SJunchao Zhang   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4706b1d57f15SBarry Smith   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
470755d1abb9SHong Zhang   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
470858cb9c82SHong Zhang   MPI_Status          *status;
47090298fd71SBarry Smith   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4710be0fcf8dSHong Zhang   PetscBT              lnkbt;
471151a7d1a8SHong Zhang   Mat_Merge_SeqsToMPI *merge;
4712776b82aeSLisandro Dalcin   PetscContainer       container;
471302c68681SHong Zhang 
4714e5f2cdd8SHong Zhang   PetscFunctionBegin;
47159566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
47163c2c1871SHong Zhang 
471738f152feSBarry Smith   /* make sure it is a PETSc comm */
47189566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
47199566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
47209566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
472155d1abb9SHong Zhang 
47229566063dSJacob Faibussowitsch   PetscCall(PetscNew(&merge));
47239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &status));
4724e5f2cdd8SHong Zhang 
47256abd8857SHong Zhang   /* determine row ownership */
4726f08fae4eSHong Zhang   /*---------------------------------------------------------*/
47279566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
47289566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
47299566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
47309566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
47319566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(merge->rowmap));
47329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &len_si));
47339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &merge->len_s));
473455d1abb9SHong Zhang 
47357a2fc3feSBarry Smith   m      = merge->rowmap->n;
47367a2fc3feSBarry Smith   owners = merge->rowmap->range;
47376abd8857SHong Zhang 
47386abd8857SHong Zhang   /* determine the number of messages to send, their lengths */
47396abd8857SHong Zhang   /*---------------------------------------------------------*/
47403e06a4e6SHong Zhang   len_s = merge->len_s;
474151a7d1a8SHong Zhang 
47422257cef7SHong Zhang   len          = 0; /* length of buf_si[] */
4743c2234fe3SHong Zhang   merge->nsend = 0;
4744409913e3SHong Zhang   for (proc = 0; proc < size; proc++) {
47452257cef7SHong Zhang     len_si[proc] = 0;
47463e06a4e6SHong Zhang     if (proc == rank) {
47476abd8857SHong Zhang       len_s[proc] = 0;
47483e06a4e6SHong Zhang     } else {
474902c68681SHong Zhang       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
47503e06a4e6SHong Zhang       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
47513e06a4e6SHong Zhang     }
47523e06a4e6SHong Zhang     if (len_s[proc]) {
4753c2234fe3SHong Zhang       merge->nsend++;
47542257cef7SHong Zhang       nrows = 0;
47552257cef7SHong Zhang       for (i = owners[proc]; i < owners[proc + 1]; i++) {
47562257cef7SHong Zhang         if (ai[i + 1] > ai[i]) nrows++;
47572257cef7SHong Zhang       }
47582257cef7SHong Zhang       len_si[proc] = 2 * (nrows + 1);
47592257cef7SHong Zhang       len += len_si[proc];
4760409913e3SHong Zhang     }
476158cb9c82SHong Zhang   }
4762409913e3SHong Zhang 
47632257cef7SHong Zhang   /* determine the number and length of messages to receive for ij-structure */
47642257cef7SHong Zhang   /*-------------------------------------------------------------------------*/
47659566063dSJacob Faibussowitsch   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
47669566063dSJacob Faibussowitsch   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4767671beff6SHong Zhang 
47683e06a4e6SHong Zhang   /* post the Irecv of j-structure */
47693e06a4e6SHong Zhang   /*-------------------------------*/
47709566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm, &tagj));
47719566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
477202c68681SHong Zhang 
47733e06a4e6SHong Zhang   /* post the Isend of j-structure */
4774affca5deSHong Zhang   /*--------------------------------*/
47759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
47763e06a4e6SHong Zhang 
47772257cef7SHong Zhang   for (proc = 0, k = 0; proc < size; proc++) {
4778409913e3SHong Zhang     if (!len_s[proc]) continue;
477902c68681SHong Zhang     i = owners[proc];
47809566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
478151a7d1a8SHong Zhang     k++;
478251a7d1a8SHong Zhang   }
478351a7d1a8SHong Zhang 
47843e06a4e6SHong Zhang   /* receives and sends of j-structure are complete */
47853e06a4e6SHong Zhang   /*------------------------------------------------*/
47869566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
47879566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
478802c68681SHong Zhang 
478902c68681SHong Zhang   /* send and recv i-structure */
479002c68681SHong Zhang   /*---------------------------*/
47919566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm, &tagi));
47929566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
479302c68681SHong Zhang 
47949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(len + 1, &buf_s));
47953e06a4e6SHong Zhang   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
47962257cef7SHong Zhang   for (proc = 0, k = 0; proc < size; proc++) {
479702c68681SHong Zhang     if (!len_s[proc]) continue;
47983e06a4e6SHong Zhang     /* form outgoing message for i-structure:
47993e06a4e6SHong Zhang          buf_si[0]:                 nrows to be sent
48003e06a4e6SHong Zhang                [1:nrows]:           row index (global)
48013e06a4e6SHong Zhang                [nrows+1:2*nrows+1]: i-structure index
48023e06a4e6SHong Zhang     */
48033e06a4e6SHong Zhang     /*-------------------------------------------*/
48042257cef7SHong Zhang     nrows       = len_si[proc] / 2 - 1;
48053e06a4e6SHong Zhang     buf_si_i    = buf_si + nrows + 1;
48063e06a4e6SHong Zhang     buf_si[0]   = nrows;
48073e06a4e6SHong Zhang     buf_si_i[0] = 0;
48083e06a4e6SHong Zhang     nrows       = 0;
48093e06a4e6SHong Zhang     for (i = owners[proc]; i < owners[proc + 1]; i++) {
48103e06a4e6SHong Zhang       anzi = ai[i + 1] - ai[i];
48113e06a4e6SHong Zhang       if (anzi) {
48123e06a4e6SHong Zhang         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
48133e06a4e6SHong Zhang         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
48143e06a4e6SHong Zhang         nrows++;
48153e06a4e6SHong Zhang       }
48163e06a4e6SHong Zhang     }
48179566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
481802c68681SHong Zhang     k++;
48192257cef7SHong Zhang     buf_si += len_si[proc];
482002c68681SHong Zhang   }
48212257cef7SHong Zhang 
48229566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
48239566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
482402c68681SHong Zhang 
48259566063dSJacob Faibussowitsch   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
482648a46eb9SPierre Jolivet   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
48273e06a4e6SHong Zhang 
48289566063dSJacob Faibussowitsch   PetscCall(PetscFree(len_si));
48299566063dSJacob Faibussowitsch   PetscCall(PetscFree(len_ri));
48309566063dSJacob Faibussowitsch   PetscCall(PetscFree(rj_waits));
48319566063dSJacob Faibussowitsch   PetscCall(PetscFree2(si_waits, sj_waits));
48329566063dSJacob Faibussowitsch   PetscCall(PetscFree(ri_waits));
48339566063dSJacob Faibussowitsch   PetscCall(PetscFree(buf_s));
48349566063dSJacob Faibussowitsch   PetscCall(PetscFree(status));
483558cb9c82SHong Zhang 
4836bcc1bcd5SHong Zhang   /* compute a local seq matrix in each processor */
4837bcc1bcd5SHong Zhang   /*----------------------------------------------*/
483858cb9c82SHong Zhang   /* allocate bi array and free space for accumulating nonzero column info */
48399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &bi));
484058cb9c82SHong Zhang   bi[0] = 0;
484158cb9c82SHong Zhang 
4842be0fcf8dSHong Zhang   /* create and initialize a linked list */
4843be0fcf8dSHong Zhang   nlnk = N + 1;
48449566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
484558cb9c82SHong Zhang 
4846bcc1bcd5SHong Zhang   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4847bcc1bcd5SHong Zhang   len = ai[owners[rank + 1]] - ai[owners[rank]];
48489566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
48492205254eSKarl Rupp 
485058cb9c82SHong Zhang   current_space = free_space;
485158cb9c82SHong Zhang 
4852bcc1bcd5SHong Zhang   /* determine symbolic info for each local row */
48539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
48541d79065fSBarry Smith 
48553e06a4e6SHong Zhang   for (k = 0; k < merge->nrecv; k++) {
48562257cef7SHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
48573e06a4e6SHong Zhang     nrows       = *buf_ri_k[k];
48583e06a4e6SHong Zhang     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4859a5b23f4aSJose E. Roman     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
48603e06a4e6SHong Zhang   }
48612257cef7SHong Zhang 
4862d0609cedSBarry Smith   MatPreallocateBegin(comm, m, n, dnz, onz);
4863bcc1bcd5SHong Zhang   len = 0;
486458cb9c82SHong Zhang   for (i = 0; i < m; i++) {
486558cb9c82SHong Zhang     bnzi = 0;
486658cb9c82SHong Zhang     /* add local non-zero cols of this proc's seqmat into lnk */
486758cb9c82SHong Zhang     arow = owners[rank] + i;
486858cb9c82SHong Zhang     anzi = ai[arow + 1] - ai[arow];
486958cb9c82SHong Zhang     aj   = a->j + ai[arow];
48709566063dSJacob Faibussowitsch     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
487158cb9c82SHong Zhang     bnzi += nlnk;
487258cb9c82SHong Zhang     /* add received col data into lnk */
487351a7d1a8SHong Zhang     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
487455d1abb9SHong Zhang       if (i == *nextrow[k]) {            /* i-th row */
48753e06a4e6SHong Zhang         anzi = *(nextai[k] + 1) - *nextai[k];
48763e06a4e6SHong Zhang         aj   = buf_rj[k] + *nextai[k];
48779566063dSJacob Faibussowitsch         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
48783e06a4e6SHong Zhang         bnzi += nlnk;
48799371c9d4SSatish Balay         nextrow[k]++;
48809371c9d4SSatish Balay         nextai[k]++;
48813e06a4e6SHong Zhang       }
488258cb9c82SHong Zhang     }
4883bcc1bcd5SHong Zhang     if (len < bnzi) len = bnzi; /* =max(bnzi) */
488458cb9c82SHong Zhang 
488558cb9c82SHong Zhang     /* if free space is not available, make more free space */
488648a46eb9SPierre Jolivet     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
488758cb9c82SHong Zhang     /* copy data into free space, then initialize lnk */
48889566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
48899566063dSJacob Faibussowitsch     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4890bcc1bcd5SHong Zhang 
489158cb9c82SHong Zhang     current_space->array += bnzi;
489258cb9c82SHong Zhang     current_space->local_used += bnzi;
489358cb9c82SHong Zhang     current_space->local_remaining -= bnzi;
489458cb9c82SHong Zhang 
489558cb9c82SHong Zhang     bi[i + 1] = bi[i] + bnzi;
489658cb9c82SHong Zhang   }
4897bcc1bcd5SHong Zhang 
48989566063dSJacob Faibussowitsch   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4899bcc1bcd5SHong Zhang 
49009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
49019566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
49029566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
4903409913e3SHong Zhang 
4904bcc1bcd5SHong Zhang   /* create symbolic parallel matrix B_mpi */
4905bcc1bcd5SHong Zhang   /*---------------------------------------*/
49069566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
49079566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &B_mpi));
490854b84b50SHong Zhang   if (n == PETSC_DECIDE) {
49099566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
491054b84b50SHong Zhang   } else {
49119566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
491254b84b50SHong Zhang   }
49139566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
49149566063dSJacob Faibussowitsch   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
49159566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4916d0609cedSBarry Smith   MatPreallocateEnd(dnz, onz);
49179566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
491858cb9c82SHong Zhang 
491990431a8fSHong Zhang   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
49206abd8857SHong Zhang   B_mpi->assembled = PETSC_FALSE;
4921affca5deSHong Zhang   merge->bi        = bi;
4922affca5deSHong Zhang   merge->bj        = bj;
492302c68681SHong Zhang   merge->buf_ri    = buf_ri;
492402c68681SHong Zhang   merge->buf_rj    = buf_rj;
49250298fd71SBarry Smith   merge->coi       = NULL;
49260298fd71SBarry Smith   merge->coj       = NULL;
49270298fd71SBarry Smith   merge->owners_co = NULL;
4928affca5deSHong Zhang 
49299566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
4930bf0cc555SLisandro Dalcin 
4931affca5deSHong Zhang   /* attach the supporting struct to B_mpi for reuse */
49329566063dSJacob Faibussowitsch   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
49339566063dSJacob Faibussowitsch   PetscCall(PetscContainerSetPointer(container, merge));
49349566063dSJacob Faibussowitsch   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
49359566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
49369566063dSJacob Faibussowitsch   PetscCall(PetscContainerDestroy(&container));
4937affca5deSHong Zhang   *mpimat = B_mpi;
493838f152feSBarry Smith 
49399566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
4940e5f2cdd8SHong Zhang   PetscFunctionReturn(0);
4941e5f2cdd8SHong Zhang }
494225616d81SHong Zhang 
4943d4036a1aSHong Zhang /*@C
494411a5261eSBarry Smith       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
4945d4036a1aSHong Zhang                  matrices from each processor
4946d4036a1aSHong Zhang 
4947d083f849SBarry Smith     Collective
4948d4036a1aSHong Zhang 
4949d4036a1aSHong Zhang    Input Parameters:
4950d4036a1aSHong Zhang +    comm - the communicators the parallel matrix will live on
4951d4036a1aSHong Zhang .    seqmat - the input sequential matrices
495211a5261eSBarry Smith .    m - number of local rows (or `PETSC_DECIDE`)
495311a5261eSBarry Smith .    n - number of local columns (or `PETSC_DECIDE`)
495411a5261eSBarry Smith -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
4955d4036a1aSHong Zhang 
4956d4036a1aSHong Zhang    Output Parameter:
4957d4036a1aSHong Zhang .    mpimat - the parallel matrix generated
4958d4036a1aSHong Zhang 
4959d4036a1aSHong Zhang     Level: advanced
4960d4036a1aSHong Zhang 
496111a5261eSBarry Smith    Note:
4962d4036a1aSHong Zhang      The dimensions of the sequential matrix in each processor MUST be the same.
4963d4036a1aSHong Zhang      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
496411a5261eSBarry Smith      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
4965d4036a1aSHong Zhang @*/
49669371c9d4SSatish Balay PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) {
49677e63b356SHong Zhang   PetscMPIInt size;
496855d1abb9SHong Zhang 
496955d1abb9SHong Zhang   PetscFunctionBegin;
49709566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
49717e63b356SHong Zhang   if (size == 1) {
49729566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
49737e63b356SHong Zhang     if (scall == MAT_INITIAL_MATRIX) {
49749566063dSJacob Faibussowitsch       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
49757e63b356SHong Zhang     } else {
49769566063dSJacob Faibussowitsch       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
49777e63b356SHong Zhang     }
49789566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
49797e63b356SHong Zhang     PetscFunctionReturn(0);
49807e63b356SHong Zhang   }
49819566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
498248a46eb9SPierre Jolivet   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
49839566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
49849566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
498555d1abb9SHong Zhang   PetscFunctionReturn(0);
498655d1abb9SHong Zhang }
49874ebed01fSBarry Smith 
4988bc08b0f1SBarry Smith /*@
498911a5261eSBarry Smith      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
499011a5261eSBarry Smith           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
499111a5261eSBarry Smith           with `MatGetSize()`
49928a9c020eSBarry Smith 
49938a9c020eSBarry Smith     Not Collective
49948a9c020eSBarry Smith 
49958a9c020eSBarry Smith    Input Parameters:
49968a9c020eSBarry Smith +    A - the matrix
499711a5261eSBarry Smith -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
49988a9c020eSBarry Smith 
49998a9c020eSBarry Smith    Output Parameter:
50008a9c020eSBarry Smith .    A_loc - the local sequential matrix generated
50018a9c020eSBarry Smith 
50028a9c020eSBarry Smith     Level: developer
50038a9c020eSBarry Smith 
50048a9c020eSBarry Smith    Notes:
500511a5261eSBarry Smith      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
50068a9c020eSBarry Smith 
500711a5261eSBarry Smith      Destroy the matrix with `MatDestroy()`
50088a9c020eSBarry Smith 
500911a5261eSBarry Smith .seealso: `MatMPIAIJGetLocalMat()`
50108a9c020eSBarry Smith @*/
50119371c9d4SSatish Balay PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) {
50128a9c020eSBarry Smith   PetscBool mpi;
50138a9c020eSBarry Smith 
50148a9c020eSBarry Smith   PetscFunctionBegin;
50158a9c020eSBarry Smith   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
50168a9c020eSBarry Smith   if (mpi) {
50178a9c020eSBarry Smith     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
50188a9c020eSBarry Smith   } else {
50198a9c020eSBarry Smith     *A_loc = A;
50208a9c020eSBarry Smith     PetscCall(PetscObjectReference((PetscObject)*A_loc));
50218a9c020eSBarry Smith   }
50228a9c020eSBarry Smith   PetscFunctionReturn(0);
50238a9c020eSBarry Smith }
50248a9c020eSBarry Smith 
50258a9c020eSBarry Smith /*@
502611a5261eSBarry Smith      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
502711a5261eSBarry Smith           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
502811a5261eSBarry Smith           with `MatGetSize()`
502925616d81SHong Zhang 
503032fba14fSHong Zhang     Not Collective
503125616d81SHong Zhang 
503225616d81SHong Zhang    Input Parameters:
503325616d81SHong Zhang +    A - the matrix
503411a5261eSBarry Smith -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
503525616d81SHong Zhang 
503625616d81SHong Zhang    Output Parameter:
503725616d81SHong Zhang .    A_loc - the local sequential matrix generated
503825616d81SHong Zhang 
503925616d81SHong Zhang     Level: developer
504025616d81SHong Zhang 
504177c65a98SStefano Zampini    Notes:
504211a5261eSBarry Smith      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
50438a9c020eSBarry Smith 
504411a5261eSBarry Smith      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
504511a5261eSBarry Smith      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
504611a5261eSBarry Smith      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
504777c65a98SStefano Zampini      modify the values of the returned A_loc.
504877c65a98SStefano Zampini 
504911a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
505025616d81SHong Zhang @*/
50519371c9d4SSatish Balay PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) {
505201b7ae99SHong Zhang   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5053b78526a6SJose E. Roman   Mat_SeqAIJ        *mat, *a, *b;
5054b78526a6SJose E. Roman   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5055ce496241SStefano Zampini   const PetscScalar *aa, *ba, *aav, *bav;
5056ce496241SStefano Zampini   PetscScalar       *ca, *cam;
505777c65a98SStefano Zampini   PetscMPIInt        size;
5058d0f46423SBarry Smith   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
50595a7d977cSHong Zhang   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
50608661ff28SBarry Smith   PetscBool          match;
506125616d81SHong Zhang 
506225616d81SHong Zhang   PetscFunctionBegin;
50639566063dSJacob Faibussowitsch   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
506428b400f6SJacob Faibussowitsch   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
50659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
506677c65a98SStefano Zampini   if (size == 1) {
506777c65a98SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
50689566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
506977c65a98SStefano Zampini       *A_loc = mpimat->A;
507077c65a98SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
50719566063dSJacob Faibussowitsch       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
507277c65a98SStefano Zampini     }
507377c65a98SStefano Zampini     PetscFunctionReturn(0);
507477c65a98SStefano Zampini   }
507570a9ba44SHong Zhang 
50769566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5077b78526a6SJose E. Roman   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5078b78526a6SJose E. Roman   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
50799371c9d4SSatish Balay   ai = a->i;
50809371c9d4SSatish Balay   aj = a->j;
50819371c9d4SSatish Balay   bi = b->i;
50829371c9d4SSatish Balay   bj = b->j;
50839566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
50849566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5085ce496241SStefano Zampini   aa = aav;
5086ce496241SStefano Zampini   ba = bav;
508701b7ae99SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
50889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1 + am, &ci));
5089dea91ad1SHong Zhang     ci[0] = 0;
5090ad540459SPierre Jolivet     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
50919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1 + ci[am], &cj));
50929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5093dea91ad1SHong Zhang     k = 0;
509401b7ae99SHong Zhang     for (i = 0; i < am; i++) {
50955a7d977cSHong Zhang       ncols_o = bi[i + 1] - bi[i];
50965a7d977cSHong Zhang       ncols_d = ai[i + 1] - ai[i];
509701b7ae99SHong Zhang       /* off-diagonal portion of A */
50985a7d977cSHong Zhang       for (jo = 0; jo < ncols_o; jo++) {
50995a7d977cSHong Zhang         col = cmap[*bj];
51005a7d977cSHong Zhang         if (col >= cstart) break;
51019371c9d4SSatish Balay         cj[k] = col;
51029371c9d4SSatish Balay         bj++;
51035a7d977cSHong Zhang         ca[k++] = *ba++;
51045a7d977cSHong Zhang       }
51055a7d977cSHong Zhang       /* diagonal portion of A */
51065a7d977cSHong Zhang       for (j = 0; j < ncols_d; j++) {
51075a7d977cSHong Zhang         cj[k]   = cstart + *aj++;
51085a7d977cSHong Zhang         ca[k++] = *aa++;
51095a7d977cSHong Zhang       }
51105a7d977cSHong Zhang       /* off-diagonal portion of A */
51115a7d977cSHong Zhang       for (j = jo; j < ncols_o; j++) {
51125a7d977cSHong Zhang         cj[k]   = cmap[*bj++];
51135a7d977cSHong Zhang         ca[k++] = *ba++;
51145a7d977cSHong Zhang       }
511525616d81SHong Zhang     }
5116dea91ad1SHong Zhang     /* put together the new matrix */
51179566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5118dea91ad1SHong Zhang     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5119dea91ad1SHong Zhang     /* Since these are PETSc arrays, change flags to free them as necessary. */
5120dea91ad1SHong Zhang     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5121e6b907acSBarry Smith     mat->free_a  = PETSC_TRUE;
5122e6b907acSBarry Smith     mat->free_ij = PETSC_TRUE;
5123dea91ad1SHong Zhang     mat->nonew   = 0;
51245a7d977cSHong Zhang   } else if (scall == MAT_REUSE_MATRIX) {
51255a7d977cSHong Zhang     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5126fff043a9SJunchao Zhang     ci  = mat->i;
5127fff043a9SJunchao Zhang     cj  = mat->j;
51289566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
51295a7d977cSHong Zhang     for (i = 0; i < am; i++) {
51305a7d977cSHong Zhang       /* off-diagonal portion of A */
51315a7d977cSHong Zhang       ncols_o = bi[i + 1] - bi[i];
51325a7d977cSHong Zhang       for (jo = 0; jo < ncols_o; jo++) {
51335a7d977cSHong Zhang         col = cmap[*bj];
51345a7d977cSHong Zhang         if (col >= cstart) break;
51359371c9d4SSatish Balay         *cam++ = *ba++;
51369371c9d4SSatish Balay         bj++;
51375a7d977cSHong Zhang       }
51385a7d977cSHong Zhang       /* diagonal portion of A */
5139ecc9b87dSHong Zhang       ncols_d = ai[i + 1] - ai[i];
5140a77337e4SBarry Smith       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
51415a7d977cSHong Zhang       /* off-diagonal portion of A */
5142f33d1a9aSHong Zhang       for (j = jo; j < ncols_o; j++) {
51439371c9d4SSatish Balay         *cam++ = *ba++;
51449371c9d4SSatish Balay         bj++;
5145f33d1a9aSHong Zhang       }
51465a7d977cSHong Zhang     }
51479566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
514898921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
51499566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
51509566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
51519566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
515225616d81SHong Zhang   PetscFunctionReturn(0);
515325616d81SHong Zhang }
515425616d81SHong Zhang 
5155ed502f03SStefano Zampini /*@
515611a5261eSBarry Smith      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5157ed502f03SStefano Zampini           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5158ed502f03SStefano Zampini 
5159ed502f03SStefano Zampini     Not Collective
5160ed502f03SStefano Zampini 
5161ed502f03SStefano Zampini    Input Parameters:
5162ed502f03SStefano Zampini +    A - the matrix
516311a5261eSBarry Smith -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5164ed502f03SStefano Zampini 
5165d8d19677SJose E. Roman    Output Parameters:
516611a5261eSBarry Smith +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5167ed502f03SStefano Zampini -    A_loc - the local sequential matrix generated
5168ed502f03SStefano Zampini 
5169ed502f03SStefano Zampini     Level: developer
5170ed502f03SStefano Zampini 
517111a5261eSBarry Smith    Note:
517211a5261eSBarry Smith      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5173ed502f03SStefano Zampini 
517411a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5175ed502f03SStefano Zampini @*/
51769371c9d4SSatish Balay PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) {
5177ed502f03SStefano Zampini   Mat             Ao, Ad;
5178ed502f03SStefano Zampini   const PetscInt *cmap;
5179ed502f03SStefano Zampini   PetscMPIInt     size;
5180ed502f03SStefano Zampini   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5181ed502f03SStefano Zampini 
5182ed502f03SStefano Zampini   PetscFunctionBegin;
51839566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
51849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5185ed502f03SStefano Zampini   if (size == 1) {
5186ed502f03SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
51879566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)Ad));
5188ed502f03SStefano Zampini       *A_loc = Ad;
5189ed502f03SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
51909566063dSJacob Faibussowitsch       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5191ed502f03SStefano Zampini     }
51929566063dSJacob Faibussowitsch     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5193ed502f03SStefano Zampini     PetscFunctionReturn(0);
5194ed502f03SStefano Zampini   }
51959566063dSJacob Faibussowitsch   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
51969566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5197ed502f03SStefano Zampini   if (f) {
51989566063dSJacob Faibussowitsch     PetscCall((*f)(A, scall, glob, A_loc));
5199ed502f03SStefano Zampini   } else {
5200ed502f03SStefano Zampini     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5201ed502f03SStefano Zampini     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5202ed502f03SStefano Zampini     Mat_SeqAIJ        *c;
5203ed502f03SStefano Zampini     PetscInt          *ai = a->i, *aj = a->j;
5204ed502f03SStefano Zampini     PetscInt          *bi = b->i, *bj = b->j;
5205ed502f03SStefano Zampini     PetscInt          *ci, *cj;
5206ed502f03SStefano Zampini     const PetscScalar *aa, *ba;
5207ed502f03SStefano Zampini     PetscScalar       *ca;
5208ed502f03SStefano Zampini     PetscInt           i, j, am, dn, on;
5209ed502f03SStefano Zampini 
52109566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(Ad, &am, &dn));
52119566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(Ao, NULL, &on));
52129566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
52139566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5214ed502f03SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
5215ed502f03SStefano Zampini       PetscInt k;
52169566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(1 + am, &ci));
52179566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
52189566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5219ed502f03SStefano Zampini       ci[0] = 0;
5220ed502f03SStefano Zampini       for (i = 0, k = 0; i < am; i++) {
5221ed502f03SStefano Zampini         const PetscInt ncols_o = bi[i + 1] - bi[i];
5222ed502f03SStefano Zampini         const PetscInt ncols_d = ai[i + 1] - ai[i];
5223ed502f03SStefano Zampini         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5224ed502f03SStefano Zampini         /* diagonal portion of A */
5225ed502f03SStefano Zampini         for (j = 0; j < ncols_d; j++, k++) {
5226ed502f03SStefano Zampini           cj[k] = *aj++;
5227ed502f03SStefano Zampini           ca[k] = *aa++;
5228ed502f03SStefano Zampini         }
5229ed502f03SStefano Zampini         /* off-diagonal portion of A */
5230ed502f03SStefano Zampini         for (j = 0; j < ncols_o; j++, k++) {
5231ed502f03SStefano Zampini           cj[k] = dn + *bj++;
5232ed502f03SStefano Zampini           ca[k] = *ba++;
5233ed502f03SStefano Zampini         }
5234ed502f03SStefano Zampini       }
5235ed502f03SStefano Zampini       /* put together the new matrix */
52369566063dSJacob Faibussowitsch       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5237ed502f03SStefano Zampini       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5238ed502f03SStefano Zampini       /* Since these are PETSc arrays, change flags to free them as necessary. */
5239ed502f03SStefano Zampini       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5240ed502f03SStefano Zampini       c->free_a  = PETSC_TRUE;
5241ed502f03SStefano Zampini       c->free_ij = PETSC_TRUE;
5242ed502f03SStefano Zampini       c->nonew   = 0;
52439566063dSJacob Faibussowitsch       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5244ed502f03SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
52459566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5246ed502f03SStefano Zampini       for (i = 0; i < am; i++) {
5247ed502f03SStefano Zampini         const PetscInt ncols_d = ai[i + 1] - ai[i];
5248ed502f03SStefano Zampini         const PetscInt ncols_o = bi[i + 1] - bi[i];
5249ed502f03SStefano Zampini         /* diagonal portion of A */
5250ed502f03SStefano Zampini         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5251ed502f03SStefano Zampini         /* off-diagonal portion of A */
5252ed502f03SStefano Zampini         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5253ed502f03SStefano Zampini       }
52549566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
525598921bdaSJacob Faibussowitsch     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
52569566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
52579566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5258ed502f03SStefano Zampini     if (glob) {
5259ed502f03SStefano Zampini       PetscInt cst, *gidx;
5260ed502f03SStefano Zampini 
52619566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
52629566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(dn + on, &gidx));
5263ed502f03SStefano Zampini       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5264ed502f03SStefano Zampini       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
52659566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5266ed502f03SStefano Zampini     }
5267ed502f03SStefano Zampini   }
52689566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5269ed502f03SStefano Zampini   PetscFunctionReturn(0);
5270ed502f03SStefano Zampini }
5271ed502f03SStefano Zampini 
527232fba14fSHong Zhang /*@C
527311a5261eSBarry Smith      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
527432fba14fSHong Zhang 
527532fba14fSHong Zhang     Not Collective
527632fba14fSHong Zhang 
527732fba14fSHong Zhang    Input Parameters:
527832fba14fSHong Zhang +    A - the matrix
527911a5261eSBarry Smith .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
52800298fd71SBarry Smith -    row, col - index sets of rows and columns to extract (or NULL)
528132fba14fSHong Zhang 
528232fba14fSHong Zhang    Output Parameter:
528332fba14fSHong Zhang .    A_loc - the local sequential matrix generated
528432fba14fSHong Zhang 
528532fba14fSHong Zhang     Level: developer
528632fba14fSHong Zhang 
528711a5261eSBarry Smith .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
528832fba14fSHong Zhang @*/
52899371c9d4SSatish Balay PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) {
529032fba14fSHong Zhang   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
529132fba14fSHong Zhang   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
529232fba14fSHong Zhang   IS          isrowa, iscola;
529332fba14fSHong Zhang   Mat        *aloc;
52944a2b5492SBarry Smith   PetscBool   match;
529532fba14fSHong Zhang 
529632fba14fSHong Zhang   PetscFunctionBegin;
52979566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
529828b400f6SJacob Faibussowitsch   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
52999566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
530032fba14fSHong Zhang   if (!row) {
53019371c9d4SSatish Balay     start = A->rmap->rstart;
53029371c9d4SSatish Balay     end   = A->rmap->rend;
53039566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
530432fba14fSHong Zhang   } else {
530532fba14fSHong Zhang     isrowa = *row;
530632fba14fSHong Zhang   }
530732fba14fSHong Zhang   if (!col) {
5308d0f46423SBarry Smith     start = A->cmap->rstart;
530932fba14fSHong Zhang     cmap  = a->garray;
5310d0f46423SBarry Smith     nzA   = a->A->cmap->n;
5311d0f46423SBarry Smith     nzB   = a->B->cmap->n;
53129566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nzA + nzB, &idx));
531332fba14fSHong Zhang     ncols = 0;
531432fba14fSHong Zhang     for (i = 0; i < nzB; i++) {
531532fba14fSHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
531632fba14fSHong Zhang       else break;
531732fba14fSHong Zhang     }
531832fba14fSHong Zhang     imark = i;
531932fba14fSHong Zhang     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
532032fba14fSHong Zhang     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
53219566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
532232fba14fSHong Zhang   } else {
532332fba14fSHong Zhang     iscola = *col;
532432fba14fSHong Zhang   }
532532fba14fSHong Zhang   if (scall != MAT_INITIAL_MATRIX) {
53269566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1, &aloc));
532732fba14fSHong Zhang     aloc[0] = *A_loc;
532832fba14fSHong Zhang   }
53299566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5330109e0772SStefano Zampini   if (!col) { /* attach global id of condensed columns */
53319566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5332109e0772SStefano Zampini   }
533332fba14fSHong Zhang   *A_loc = aloc[0];
53349566063dSJacob Faibussowitsch   PetscCall(PetscFree(aloc));
533548a46eb9SPierre Jolivet   if (!row) PetscCall(ISDestroy(&isrowa));
533648a46eb9SPierre Jolivet   if (!col) PetscCall(ISDestroy(&iscola));
53379566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
533832fba14fSHong Zhang   PetscFunctionReturn(0);
533932fba14fSHong Zhang }
534032fba14fSHong Zhang 
53415c65b9ecSFande Kong /*
53425c65b9ecSFande Kong  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
53435c65b9ecSFande Kong  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
53445c65b9ecSFande Kong  * on a global size.
53455c65b9ecSFande Kong  * */
53469371c9d4SSatish Balay PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) {
53475c65b9ecSFande Kong   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
53485c65b9ecSFande Kong   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5349131c27b5Sprj-   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5350131c27b5Sprj-   PetscMPIInt            owner;
53515c65b9ecSFande Kong   PetscSFNode           *iremote, *oiremote;
53525c65b9ecSFande Kong   const PetscInt        *lrowindices;
53535c65b9ecSFande Kong   PetscSF                sf, osf;
53545c65b9ecSFande Kong   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
53555c65b9ecSFande Kong   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
53565c65b9ecSFande Kong   MPI_Comm               comm;
53575c65b9ecSFande Kong   ISLocalToGlobalMapping mapping;
5358fff043a9SJunchao Zhang   const PetscScalar     *pd_a, *po_a;
53595c65b9ecSFande Kong 
53605c65b9ecSFande Kong   PetscFunctionBegin;
53619566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
53625c65b9ecSFande Kong   /* plocalsize is the number of roots
53635c65b9ecSFande Kong    * nrows is the number of leaves
53645c65b9ecSFande Kong    * */
53659566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
53669566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(rows, &nrows));
53679566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows, &iremote));
53689566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(rows, &lrowindices));
53695c65b9ecSFande Kong   for (i = 0; i < nrows; i++) {
53705c65b9ecSFande Kong     /* Find a remote index and an owner for a row
53715c65b9ecSFande Kong      * The row could be local or remote
53725c65b9ecSFande Kong      * */
537334bcad68SFande Kong     owner = 0;
537434bcad68SFande Kong     lidx  = 0;
53759566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
53765c65b9ecSFande Kong     iremote[i].index = lidx;
53775c65b9ecSFande Kong     iremote[i].rank  = owner;
53785c65b9ecSFande Kong   }
53795c65b9ecSFande Kong   /* Create SF to communicate how many nonzero columns for each row */
53809566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf));
53815c65b9ecSFande Kong   /* SF will figure out the number of nonzero colunms for each row, and their
53825c65b9ecSFande Kong    * offsets
53835c65b9ecSFande Kong    * */
53849566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
53859566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
53869566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
5387bc8e477aSFande Kong 
53889566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
53899566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
53909566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows, &pnnz));
53915c65b9ecSFande Kong   roffsets[0] = 0;
53925c65b9ecSFande Kong   roffsets[1] = 0;
53935c65b9ecSFande Kong   for (i = 0; i < plocalsize; i++) {
53945c65b9ecSFande Kong     /* diag */
53955c65b9ecSFande Kong     nrcols[i * 2 + 0]         = pd->i[i + 1] - pd->i[i];
53965c65b9ecSFande Kong     /* off diag */
53975c65b9ecSFande Kong     nrcols[i * 2 + 1]         = po->i[i + 1] - po->i[i];
53985c65b9ecSFande Kong     /* compute offsets so that we relative location for each row */
53995c65b9ecSFande Kong     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
54005c65b9ecSFande Kong     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
54015c65b9ecSFande Kong   }
54029566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
54039566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
54045c65b9ecSFande Kong   /* 'r' means root, and 'l' means leaf */
54059566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
54069566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
54079566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
54089566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
54099566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
54109566063dSJacob Faibussowitsch   PetscCall(PetscFree(roffsets));
54119566063dSJacob Faibussowitsch   PetscCall(PetscFree(nrcols));
54125c65b9ecSFande Kong   dntotalcols = 0;
54135c65b9ecSFande Kong   ontotalcols = 0;
5414bc8e477aSFande Kong   ncol        = 0;
54155c65b9ecSFande Kong   for (i = 0; i < nrows; i++) {
54165c65b9ecSFande Kong     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5417bc8e477aSFande Kong     ncol    = PetscMax(pnnz[i], ncol);
54185c65b9ecSFande Kong     /* diag */
54195c65b9ecSFande Kong     dntotalcols += nlcols[i * 2 + 0];
54205c65b9ecSFande Kong     /* off diag */
54215c65b9ecSFande Kong     ontotalcols += nlcols[i * 2 + 1];
54225c65b9ecSFande Kong   }
54235c65b9ecSFande Kong   /* We do not need to figure the right number of columns
54245c65b9ecSFande Kong    * since all the calculations will be done by going through the raw data
54255c65b9ecSFande Kong    * */
54269566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
54279566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*P_oth));
54289566063dSJacob Faibussowitsch   PetscCall(PetscFree(pnnz));
54295c65b9ecSFande Kong   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
54305c65b9ecSFande Kong   /* diag */
54319566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(dntotalcols, &iremote));
54325c65b9ecSFande Kong   /* off diag */
54339566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
54345c65b9ecSFande Kong   /* diag */
54359566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
54365c65b9ecSFande Kong   /* off diag */
54379566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
54385c65b9ecSFande Kong   dntotalcols = 0;
54395c65b9ecSFande Kong   ontotalcols = 0;
54405c65b9ecSFande Kong   ntotalcols  = 0;
54415c65b9ecSFande Kong   for (i = 0; i < nrows; i++) {
544234bcad68SFande Kong     owner = 0;
54439566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
54445c65b9ecSFande Kong     /* Set iremote for diag matrix */
54455c65b9ecSFande Kong     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
54465c65b9ecSFande Kong       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
54475c65b9ecSFande Kong       iremote[dntotalcols].rank  = owner;
54485c65b9ecSFande Kong       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
54495c65b9ecSFande Kong       ilocal[dntotalcols++]      = ntotalcols++;
54505c65b9ecSFande Kong     }
54515c65b9ecSFande Kong     /* off diag */
54525c65b9ecSFande Kong     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
54535c65b9ecSFande Kong       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
54545c65b9ecSFande Kong       oiremote[ontotalcols].rank  = owner;
54555c65b9ecSFande Kong       oilocal[ontotalcols++]      = ntotalcols++;
54565c65b9ecSFande Kong     }
54575c65b9ecSFande Kong   }
54589566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(rows, &lrowindices));
54599566063dSJacob Faibussowitsch   PetscCall(PetscFree(loffsets));
54609566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlcols));
54619566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf));
54625c65b9ecSFande Kong   /* P serves as roots and P_oth is leaves
54635c65b9ecSFande Kong    * Diag matrix
54645c65b9ecSFande Kong    * */
54659566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
54669566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
54679566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
54685c65b9ecSFande Kong 
54699566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &osf));
54705c65b9ecSFande Kong   /* Off diag */
54719566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
54729566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(osf));
54739566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(osf));
54749566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
54759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
54765c65b9ecSFande Kong   /* We operate on the matrix internal data for saving memory */
54779566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
54789566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
54799566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
54805c65b9ecSFande Kong   /* Convert to global indices for diag matrix */
54815c65b9ecSFande Kong   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
54829566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
54835c65b9ecSFande Kong   /* We want P_oth store global indices */
54849566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
54855c65b9ecSFande Kong   /* Use memory scalable approach */
54869566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
54879566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
54889566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
54899566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
54905c65b9ecSFande Kong   /* Convert back to local indices */
54915c65b9ecSFande Kong   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
54929566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
54935c65b9ecSFande Kong   nout = 0;
54949566063dSJacob Faibussowitsch   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
549508401ef6SPierre Jolivet   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
54969566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
54975c65b9ecSFande Kong   /* Exchange values */
54989566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
54999566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
55009566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
55019566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
55025c65b9ecSFande Kong   /* Stop PETSc from shrinking memory */
55035c65b9ecSFande Kong   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
55049566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
55059566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
55065c65b9ecSFande Kong   /* Attach PetscSF objects to P_oth so that we can reuse it later */
55079566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
55089566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
55099566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
55109566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&osf));
55115c65b9ecSFande Kong   PetscFunctionReturn(0);
55125c65b9ecSFande Kong }
55135c65b9ecSFande Kong 
55145c65b9ecSFande Kong /*
55155c65b9ecSFande Kong  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
55165c65b9ecSFande Kong  * This supports MPIAIJ and MAIJ
55175c65b9ecSFande Kong  * */
55189371c9d4SSatish Balay PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) {
55195c65b9ecSFande Kong   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5520bc8e477aSFande Kong   Mat_SeqAIJ *p_oth;
5521bc8e477aSFande Kong   IS          rows, map;
5522bc8e477aSFande Kong   PetscHMapI  hamp;
5523bc8e477aSFande Kong   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
55245c65b9ecSFande Kong   MPI_Comm    comm;
55255c65b9ecSFande Kong   PetscSF     sf, osf;
5526bc8e477aSFande Kong   PetscBool   has;
55275c65b9ecSFande Kong 
55285c65b9ecSFande Kong   PetscFunctionBegin;
55299566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
55309566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
55315c65b9ecSFande Kong   /* If it is the first time, create an index set of off-diag nonzero columns of A,
55325c65b9ecSFande Kong    *  and then create a submatrix (that often is an overlapping matrix)
55335c65b9ecSFande Kong    * */
55345c65b9ecSFande Kong   if (reuse == MAT_INITIAL_MATRIX) {
55355c65b9ecSFande Kong     /* Use a hash table to figure out unique keys */
55369566063dSJacob Faibussowitsch     PetscCall(PetscHMapICreate(&hamp));
55379566063dSJacob Faibussowitsch     PetscCall(PetscHMapIResize(hamp, a->B->cmap->n));
55389566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5539bc8e477aSFande Kong     count = 0;
5540bc8e477aSFande Kong     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5541bc8e477aSFande Kong     for (i = 0; i < a->B->cmap->n; i++) {
5542bc8e477aSFande Kong       key = a->garray[i] / dof;
55439566063dSJacob Faibussowitsch       PetscCall(PetscHMapIHas(hamp, key, &has));
5544bc8e477aSFande Kong       if (!has) {
5545bc8e477aSFande Kong         mapping[i] = count;
55469566063dSJacob Faibussowitsch         PetscCall(PetscHMapISet(hamp, key, count++));
5547bc8e477aSFande Kong       } else {
5548bc8e477aSFande Kong         /* Current 'i' has the same value the previous step */
5549bc8e477aSFande Kong         mapping[i] = count - 1;
55505c65b9ecSFande Kong       }
5551bc8e477aSFande Kong     }
55529566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
55539566063dSJacob Faibussowitsch     PetscCall(PetscHMapIGetSize(hamp, &htsize));
555408401ef6SPierre Jolivet     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count);
55559566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(htsize, &rowindices));
55565c65b9ecSFande Kong     off = 0;
55579566063dSJacob Faibussowitsch     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
55589566063dSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&hamp));
55599566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(htsize, rowindices));
55609566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
55615c65b9ecSFande Kong     /* In case, the matrix was already created but users want to recreate the matrix */
55629566063dSJacob Faibussowitsch     PetscCall(MatDestroy(P_oth));
55639566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
55649566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
55659566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&map));
55669566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&rows));
55675c65b9ecSFande Kong   } else if (reuse == MAT_REUSE_MATRIX) {
55685c65b9ecSFande Kong     /* If matrix was already created, we simply update values using SF objects
55695c65b9ecSFande Kong      * that as attached to the matrix ealier.
5570fff043a9SJunchao Zhang      */
5571fff043a9SJunchao Zhang     const PetscScalar *pd_a, *po_a;
5572fff043a9SJunchao Zhang 
55739566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
55749566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
557508401ef6SPierre Jolivet     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
55765c65b9ecSFande Kong     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
55775c65b9ecSFande Kong     /* Update values in place */
55789566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
55799566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
55809566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
55819566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
55829566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
55839566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
55849566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
55859566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
55866718818eSStefano Zampini   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
55879566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
55885c65b9ecSFande Kong   PetscFunctionReturn(0);
55895c65b9ecSFande Kong }
55905c65b9ecSFande Kong 
559125616d81SHong Zhang /*@C
559211a5261eSBarry Smith   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
559325616d81SHong Zhang 
559411a5261eSBarry Smith   Collective on A
559525616d81SHong Zhang 
559625616d81SHong Zhang   Input Parameters:
559711a5261eSBarry Smith + A - the first matrix in `MATMPIAIJ` format
559811a5261eSBarry Smith . B - the second matrix in `MATMPIAIJ` format
559911a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
560025616d81SHong Zhang 
5601f1a722f8SMatthew G. Knepley   Output Parameters:
5602f1a722f8SMatthew G. Knepley + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5603f1a722f8SMatthew G. Knepley . colb - On input index sets of columns of B to extract (or NULL), modified on output
5604f1a722f8SMatthew G. Knepley - B_seq - the sequential matrix generated
560525616d81SHong Zhang 
560625616d81SHong Zhang   Level: developer
560725616d81SHong Zhang 
560825616d81SHong Zhang @*/
56099371c9d4SSatish Balay PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) {
5610899cda47SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5611b1d57f15SBarry Smith   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
561225616d81SHong Zhang   IS          isrowb, iscolb;
56130298fd71SBarry Smith   Mat        *bseq = NULL;
561425616d81SHong Zhang 
561525616d81SHong Zhang   PetscFunctionBegin;
5616d0f46423SBarry Smith   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
561798921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
561825616d81SHong Zhang   }
56199566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
562025616d81SHong Zhang 
562125616d81SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5622d0f46423SBarry Smith     start = A->cmap->rstart;
562325616d81SHong Zhang     cmap  = a->garray;
5624d0f46423SBarry Smith     nzA   = a->A->cmap->n;
5625d0f46423SBarry Smith     nzB   = a->B->cmap->n;
56269566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nzA + nzB, &idx));
562725616d81SHong Zhang     ncols = 0;
56280390132cSHong Zhang     for (i = 0; i < nzB; i++) { /* row < local row index */
562925616d81SHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
563025616d81SHong Zhang       else break;
563125616d81SHong Zhang     }
563225616d81SHong Zhang     imark = i;
56330390132cSHong Zhang     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
56340390132cSHong Zhang     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
56359566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
56369566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
563725616d81SHong Zhang   } else {
563808401ef6SPierre Jolivet     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
56399371c9d4SSatish Balay     isrowb = *rowb;
56409371c9d4SSatish Balay     iscolb = *colb;
56419566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1, &bseq));
564225616d81SHong Zhang     bseq[0] = *B_seq;
564325616d81SHong Zhang   }
56449566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
564525616d81SHong Zhang   *B_seq = bseq[0];
56469566063dSJacob Faibussowitsch   PetscCall(PetscFree(bseq));
564725616d81SHong Zhang   if (!rowb) {
56489566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrowb));
564925616d81SHong Zhang   } else {
565025616d81SHong Zhang     *rowb = isrowb;
565125616d81SHong Zhang   }
565225616d81SHong Zhang   if (!colb) {
56539566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscolb));
565425616d81SHong Zhang   } else {
565525616d81SHong Zhang     *colb = iscolb;
565625616d81SHong Zhang   }
56579566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
565825616d81SHong Zhang   PetscFunctionReturn(0);
565925616d81SHong Zhang }
5660429d309bSHong Zhang 
5661f8487c73SHong Zhang /*
5662f8487c73SHong Zhang     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
566301b7ae99SHong Zhang     of the OFF-DIAGONAL portion of local A
5664429d309bSHong Zhang 
5665429d309bSHong Zhang     Collective on Mat
5666429d309bSHong Zhang 
5667429d309bSHong Zhang    Input Parameters:
5668429d309bSHong Zhang +    A,B - the matrices in mpiaij format
5669598bc09dSHong Zhang -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5670429d309bSHong Zhang 
5671429d309bSHong Zhang    Output Parameter:
56720298fd71SBarry Smith +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
56730298fd71SBarry Smith .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
56740298fd71SBarry Smith .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5675598bc09dSHong Zhang -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5676429d309bSHong Zhang 
567711a5261eSBarry Smith     Developer Note:
567811a5261eSBarry Smith     This directly accesses information inside the VecScatter associated with the matrix-vector product
56796eb45d04SBarry Smith      for this matrix. This is not desirable..
56806eb45d04SBarry Smith 
5681429d309bSHong Zhang     Level: developer
5682429d309bSHong Zhang 
5683f8487c73SHong Zhang */
56849371c9d4SSatish Balay PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) {
5685899cda47SBarry Smith   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
568687025532SHong Zhang   Mat_SeqAIJ        *b_oth;
56874b8d542aSHong Zhang   VecScatter         ctx;
5688ce94432eSBarry Smith   MPI_Comm           comm;
56893515ee7fSJunchao Zhang   const PetscMPIInt *rprocs, *sprocs;
56903515ee7fSJunchao Zhang   const PetscInt    *srow, *rstarts, *sstarts;
5691277f51e8SBarry Smith   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5692f4259b30SLisandro Dalcin   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5693277f51e8SBarry Smith   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5694ddea5d60SJunchao Zhang   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5695ddea5d60SJunchao Zhang   PetscMPIInt        size, tag, rank, nreqs;
5696429d309bSHong Zhang 
5697429d309bSHong Zhang   PetscFunctionBegin;
56989566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
56999566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
5700a7c7454dSHong Zhang 
5701c0aa6a63SJacob Faibussowitsch   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
570298921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5703429d309bSHong Zhang   }
57049566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
57059566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5706a6b2eed2SHong Zhang 
5707ec07b8f8SHong Zhang   if (size == 1) {
5708ec07b8f8SHong Zhang     startsj_s = NULL;
5709ec07b8f8SHong Zhang     bufa_ptr  = NULL;
571052f7967eSHong Zhang     *B_oth    = NULL;
5711ec07b8f8SHong Zhang     PetscFunctionReturn(0);
5712ec07b8f8SHong Zhang   }
5713ec07b8f8SHong Zhang 
5714fa83eaafSHong Zhang   ctx = a->Mvctx;
57154b8d542aSHong Zhang   tag = ((PetscObject)ctx)->tag;
57164b8d542aSHong Zhang 
57179566063dSJacob Faibussowitsch   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
57183515ee7fSJunchao Zhang   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
57199566063dSJacob Faibussowitsch   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
57209566063dSJacob Faibussowitsch   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
57219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nreqs, &reqs));
5722ddea5d60SJunchao Zhang   rwaits = reqs;
5723ddea5d60SJunchao Zhang   swaits = reqs + nrecvs;
5724429d309bSHong Zhang 
5725b7f45c76SHong Zhang   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5726429d309bSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5727a6b2eed2SHong Zhang     /* i-array */
5728a6b2eed2SHong Zhang     /*---------*/
5729a6b2eed2SHong Zhang     /*  post receives */
57309566063dSJacob Faibussowitsch     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5731a6b2eed2SHong Zhang     for (i = 0; i < nrecvs; i++) {
573274268593SBarry Smith       rowlen = rvalues + rstarts[i] * rbs;
5733e42f35eeSHong Zhang       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
57349566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5735429d309bSHong Zhang     }
5736a6b2eed2SHong Zhang 
5737a6b2eed2SHong Zhang     /* pack the outgoing message */
57389566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
57392205254eSKarl Rupp 
57402205254eSKarl Rupp     sstartsj[0] = 0;
57412205254eSKarl Rupp     rstartsj[0] = 0;
5742a6b2eed2SHong Zhang     len         = 0; /* total length of j or a array to be sent */
57433515ee7fSJunchao Zhang     if (nsends) {
57443515ee7fSJunchao Zhang       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
57459566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
57463515ee7fSJunchao Zhang     }
5747a6b2eed2SHong Zhang     for (i = 0; i < nsends; i++) {
57483515ee7fSJunchao Zhang       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5749e42f35eeSHong Zhang       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
575087025532SHong Zhang       for (j = 0; j < nrows; j++) {
5751d0f46423SBarry Smith         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5752e42f35eeSHong Zhang         for (l = 0; l < sbs; l++) {
57539566063dSJacob Faibussowitsch           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
57542205254eSKarl Rupp 
5755e42f35eeSHong Zhang           rowlen[j * sbs + l] = ncols;
57562205254eSKarl Rupp 
5757e42f35eeSHong Zhang           len += ncols;
57589566063dSJacob Faibussowitsch           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5759e42f35eeSHong Zhang         }
5760a6b2eed2SHong Zhang         k++;
5761429d309bSHong Zhang       }
57629566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
57632205254eSKarl Rupp 
5764dea91ad1SHong Zhang       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5765429d309bSHong Zhang     }
576687025532SHong Zhang     /* recvs and sends of i-array are completed */
57679566063dSJacob Faibussowitsch     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
57689566063dSJacob Faibussowitsch     PetscCall(PetscFree(svalues));
5769e42f35eeSHong Zhang 
5770a6b2eed2SHong Zhang     /* allocate buffers for sending j and a arrays */
57719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len + 1, &bufj));
57729566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len + 1, &bufa));
5773a6b2eed2SHong Zhang 
577487025532SHong Zhang     /* create i-array of B_oth */
57759566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
57762205254eSKarl Rupp 
577787025532SHong Zhang     b_othi[0] = 0;
5778a6b2eed2SHong Zhang     len       = 0; /* total length of j or a array to be received */
5779a6b2eed2SHong Zhang     k         = 0;
5780a6b2eed2SHong Zhang     for (i = 0; i < nrecvs; i++) {
57813515ee7fSJunchao Zhang       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
57823515ee7fSJunchao Zhang       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
578387025532SHong Zhang       for (j = 0; j < nrows; j++) {
578487025532SHong Zhang         b_othi[k + 1] = b_othi[k] + rowlen[j];
57859566063dSJacob Faibussowitsch         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5786f91af8c7SBarry Smith         k++;
5787a6b2eed2SHong Zhang       }
5788dea91ad1SHong Zhang       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5789a6b2eed2SHong Zhang     }
57909566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvalues));
5791a6b2eed2SHong Zhang 
57926aad120cSJose E. Roman     /* allocate space for j and a arrays of B_oth */
57939566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
57949566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5795a6b2eed2SHong Zhang 
579687025532SHong Zhang     /* j-array */
579787025532SHong Zhang     /*---------*/
5798a6b2eed2SHong Zhang     /*  post receives of j-array */
5799a6b2eed2SHong Zhang     for (i = 0; i < nrecvs; i++) {
580087025532SHong Zhang       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
58019566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5802a6b2eed2SHong Zhang     }
5803e42f35eeSHong Zhang 
5804e42f35eeSHong Zhang     /* pack the outgoing message j-array */
58053515ee7fSJunchao Zhang     if (nsends) k = sstarts[0];
5806a6b2eed2SHong Zhang     for (i = 0; i < nsends; i++) {
5807e42f35eeSHong Zhang       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5808a6b2eed2SHong Zhang       bufJ  = bufj + sstartsj[i];
580987025532SHong Zhang       for (j = 0; j < nrows; j++) {
5810d0f46423SBarry Smith         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5811e42f35eeSHong Zhang         for (ll = 0; ll < sbs; ll++) {
58129566063dSJacob Faibussowitsch           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5813ad540459SPierre Jolivet           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
58149566063dSJacob Faibussowitsch           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5815e42f35eeSHong Zhang         }
581687025532SHong Zhang       }
58179566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
581887025532SHong Zhang     }
581987025532SHong Zhang 
582087025532SHong Zhang     /* recvs and sends of j-array are completed */
58219566063dSJacob Faibussowitsch     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
582287025532SHong Zhang   } else if (scall == MAT_REUSE_MATRIX) {
5823b7f45c76SHong Zhang     sstartsj = *startsj_s;
58241d79065fSBarry Smith     rstartsj = *startsj_r;
582587025532SHong Zhang     bufa     = *bufa_ptr;
582687025532SHong Zhang     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
58279566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5828ddea5d60SJunchao Zhang   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
582987025532SHong Zhang 
583087025532SHong Zhang   /* a-array */
583187025532SHong Zhang   /*---------*/
583287025532SHong Zhang   /*  post receives of a-array */
583387025532SHong Zhang   for (i = 0; i < nrecvs; i++) {
583487025532SHong Zhang     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
58359566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
583687025532SHong Zhang   }
5837e42f35eeSHong Zhang 
5838e42f35eeSHong Zhang   /* pack the outgoing message a-array */
58393515ee7fSJunchao Zhang   if (nsends) k = sstarts[0];
584087025532SHong Zhang   for (i = 0; i < nsends; i++) {
5841e42f35eeSHong Zhang     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
584287025532SHong Zhang     bufA  = bufa + sstartsj[i];
584387025532SHong Zhang     for (j = 0; j < nrows; j++) {
5844d0f46423SBarry Smith       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5845e42f35eeSHong Zhang       for (ll = 0; ll < sbs; ll++) {
58469566063dSJacob Faibussowitsch         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5847ad540459SPierre Jolivet         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
58489566063dSJacob Faibussowitsch         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5849e42f35eeSHong Zhang       }
5850a6b2eed2SHong Zhang     }
58519566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5852a6b2eed2SHong Zhang   }
585387025532SHong Zhang   /* recvs and sends of a-array are completed */
58549566063dSJacob Faibussowitsch   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
58559566063dSJacob Faibussowitsch   PetscCall(PetscFree(reqs));
5856a6b2eed2SHong Zhang 
585787025532SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5858a6b2eed2SHong Zhang     /* put together the new matrix */
58599566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5860a6b2eed2SHong Zhang 
5861a6b2eed2SHong Zhang     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5862a6b2eed2SHong Zhang     /* Since these are PETSc arrays, change flags to free them as necessary. */
586387025532SHong Zhang     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5864e6b907acSBarry Smith     b_oth->free_a  = PETSC_TRUE;
5865e6b907acSBarry Smith     b_oth->free_ij = PETSC_TRUE;
586687025532SHong Zhang     b_oth->nonew   = 0;
5867a6b2eed2SHong Zhang 
58689566063dSJacob Faibussowitsch     PetscCall(PetscFree(bufj));
5869b7f45c76SHong Zhang     if (!startsj_s || !bufa_ptr) {
58709566063dSJacob Faibussowitsch       PetscCall(PetscFree2(sstartsj, rstartsj));
58719566063dSJacob Faibussowitsch       PetscCall(PetscFree(bufa_ptr));
5872dea91ad1SHong Zhang     } else {
5873b7f45c76SHong Zhang       *startsj_s = sstartsj;
58741d79065fSBarry Smith       *startsj_r = rstartsj;
587587025532SHong Zhang       *bufa_ptr  = bufa;
587687025532SHong Zhang     }
5877fff043a9SJunchao Zhang   } else if (scall == MAT_REUSE_MATRIX) {
58789566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5879dea91ad1SHong Zhang   }
58803515ee7fSJunchao Zhang 
58819566063dSJacob Faibussowitsch   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
58829566063dSJacob Faibussowitsch   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
58839566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5884429d309bSHong Zhang   PetscFunctionReturn(0);
5885429d309bSHong Zhang }
5886ccd8e176SBarry Smith 
5887cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5888cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5889ca9cdca7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
58909779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE)
5891a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5892191b95cbSRichard Tran Mills #endif
5893ae8d29abSPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5894cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
58955d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5896cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
58975d7652ecSHong Zhang #endif
5898d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
5899d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5900d24d4204SJose E. Roman #endif
590163c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE)
590263c07aadSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
590363c07aadSStefano Zampini #endif
59043338378cSStefano Zampini #if defined(PETSC_HAVE_CUDA)
59053338378cSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
59063338378cSStefano Zampini #endif
59073d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
59083d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
59093d0639e7SStefano Zampini #endif
5910d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
59114222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
59124222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
591317667f90SBarry Smith 
5914fc4dec0aSBarry Smith /*
5915fc4dec0aSBarry Smith     Computes (B'*A')' since computing B*A directly is untenable
5916fc4dec0aSBarry Smith 
5917fc4dec0aSBarry Smith                n                       p                          p
59182da392ccSBarry Smith         [             ]       [             ]         [                 ]
59192da392ccSBarry Smith       m [      A      ]  *  n [       B     ]   =   m [         C       ]
59202da392ccSBarry Smith         [             ]       [             ]         [                 ]
5921fc4dec0aSBarry Smith 
5922fc4dec0aSBarry Smith */
59239371c9d4SSatish Balay static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) {
5924fc4dec0aSBarry Smith   Mat At, Bt, Ct;
5925fc4dec0aSBarry Smith 
5926fc4dec0aSBarry Smith   PetscFunctionBegin;
59279566063dSJacob Faibussowitsch   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
59289566063dSJacob Faibussowitsch   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
59299566063dSJacob Faibussowitsch   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
59309566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&At));
59319566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Bt));
59327fb60732SBarry Smith   PetscCall(MatTransposeSetPrecursor(Ct, C));
59339566063dSJacob Faibussowitsch   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
59349566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
5935fc4dec0aSBarry Smith   PetscFunctionReturn(0);
5936fc4dec0aSBarry Smith }
5937fc4dec0aSBarry Smith 
59389371c9d4SSatish Balay static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) {
59396718818eSStefano Zampini   PetscBool cisdense;
5940fc4dec0aSBarry Smith 
5941fc4dec0aSBarry Smith   PetscFunctionBegin;
594208401ef6SPierre Jolivet   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
59439566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
59449566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C, A, B));
59459566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
594648a46eb9SPierre Jolivet   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
59479566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
5948f75ecaa4SHong Zhang 
59494222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5950fc4dec0aSBarry Smith   PetscFunctionReturn(0);
5951fc4dec0aSBarry Smith }
5952fc4dec0aSBarry Smith 
5953fc4dec0aSBarry Smith /* ----------------------------------------------------------------*/
59549371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) {
59554222ddf1SHong Zhang   Mat_Product *product = C->product;
59564222ddf1SHong Zhang   Mat          A = product->A, B = product->B;
5957fc4dec0aSBarry Smith 
5958fc4dec0aSBarry Smith   PetscFunctionBegin;
59594222ddf1SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
596098921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
59614222ddf1SHong Zhang 
59624222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
59634222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
5964fc4dec0aSBarry Smith   PetscFunctionReturn(0);
5965fc4dec0aSBarry Smith }
5966fc4dec0aSBarry Smith 
59679371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) {
59684222ddf1SHong Zhang   Mat_Product *product = C->product;
59694222ddf1SHong Zhang 
59704222ddf1SHong Zhang   PetscFunctionBegin;
597148a46eb9SPierre Jolivet   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
59724222ddf1SHong Zhang   PetscFunctionReturn(0);
59734222ddf1SHong Zhang }
5974394ed5ebSJunchao Zhang 
5975158ec288SJunchao Zhang /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5976394ed5ebSJunchao Zhang 
5977394ed5ebSJunchao Zhang   Input Parameters:
5978394ed5ebSJunchao Zhang 
5979394ed5ebSJunchao Zhang     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5980394ed5ebSJunchao Zhang     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5981394ed5ebSJunchao Zhang 
5982158ec288SJunchao Zhang     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5983394ed5ebSJunchao Zhang 
5984394ed5ebSJunchao Zhang     For Set1, j1[] contains column indices of the nonzeros.
5985394ed5ebSJunchao Zhang     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5986394ed5ebSJunchao Zhang     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5987394ed5ebSJunchao Zhang     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5988394ed5ebSJunchao Zhang 
5989394ed5ebSJunchao Zhang     Similar for Set2.
5990394ed5ebSJunchao Zhang 
5991394ed5ebSJunchao Zhang     This routine merges the two sets of nonzeros row by row and removes repeats.
5992394ed5ebSJunchao Zhang 
5993158ec288SJunchao Zhang   Output Parameters: (memory is allocated by the caller)
5994394ed5ebSJunchao Zhang 
5995394ed5ebSJunchao Zhang     i[],j[]: the CSR of the merged matrix, which has m rows.
5996394ed5ebSJunchao Zhang     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5997394ed5ebSJunchao Zhang     imap2[]: similar to imap1[], but for Set2.
5998394ed5ebSJunchao Zhang     Note we order nonzeros row-by-row and from left to right.
5999394ed5ebSJunchao Zhang */
60009371c9d4SSatish Balay static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) {
6001394ed5ebSJunchao Zhang   PetscInt   r, m; /* Row index of mat */
6002394ed5ebSJunchao Zhang   PetscCount t, t1, t2, b1, e1, b2, e2;
6003394ed5ebSJunchao Zhang 
6004394ed5ebSJunchao Zhang   PetscFunctionBegin;
60059566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, NULL));
6006394ed5ebSJunchao Zhang   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6007394ed5ebSJunchao Zhang   i[0]        = 0;
6008394ed5ebSJunchao Zhang   for (r = 0; r < m; r++) { /* Do row by row merging */
6009394ed5ebSJunchao Zhang     b1 = rowBegin1[r];
6010394ed5ebSJunchao Zhang     e1 = rowEnd1[r];
6011394ed5ebSJunchao Zhang     b2 = rowBegin2[r];
6012394ed5ebSJunchao Zhang     e2 = rowEnd2[r];
6013394ed5ebSJunchao Zhang     while (b1 < e1 && b2 < e2) {
6014394ed5ebSJunchao Zhang       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6015394ed5ebSJunchao Zhang         j[t]      = j1[b1];
6016394ed5ebSJunchao Zhang         imap1[t1] = t;
6017394ed5ebSJunchao Zhang         imap2[t2] = t;
6018394ed5ebSJunchao Zhang         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6019394ed5ebSJunchao Zhang         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
60209371c9d4SSatish Balay         t1++;
60219371c9d4SSatish Balay         t2++;
60229371c9d4SSatish Balay         t++;
6023394ed5ebSJunchao Zhang       } else if (j1[b1] < j2[b2]) {
6024394ed5ebSJunchao Zhang         j[t]      = j1[b1];
6025394ed5ebSJunchao Zhang         imap1[t1] = t;
6026394ed5ebSJunchao Zhang         b1 += jmap1[t1 + 1] - jmap1[t1];
60279371c9d4SSatish Balay         t1++;
60289371c9d4SSatish Balay         t++;
6029394ed5ebSJunchao Zhang       } else {
6030394ed5ebSJunchao Zhang         j[t]      = j2[b2];
6031394ed5ebSJunchao Zhang         imap2[t2] = t;
6032394ed5ebSJunchao Zhang         b2 += jmap2[t2 + 1] - jmap2[t2];
60339371c9d4SSatish Balay         t2++;
60349371c9d4SSatish Balay         t++;
6035394ed5ebSJunchao Zhang       }
6036394ed5ebSJunchao Zhang     }
6037394ed5ebSJunchao Zhang     /* Merge the remaining in either j1[] or j2[] */
6038394ed5ebSJunchao Zhang     while (b1 < e1) {
6039394ed5ebSJunchao Zhang       j[t]      = j1[b1];
6040394ed5ebSJunchao Zhang       imap1[t1] = t;
6041394ed5ebSJunchao Zhang       b1 += jmap1[t1 + 1] - jmap1[t1];
60429371c9d4SSatish Balay       t1++;
60439371c9d4SSatish Balay       t++;
6044394ed5ebSJunchao Zhang     }
6045394ed5ebSJunchao Zhang     while (b2 < e2) {
6046394ed5ebSJunchao Zhang       j[t]      = j2[b2];
6047394ed5ebSJunchao Zhang       imap2[t2] = t;
6048394ed5ebSJunchao Zhang       b2 += jmap2[t2 + 1] - jmap2[t2];
60499371c9d4SSatish Balay       t2++;
60509371c9d4SSatish Balay       t++;
6051394ed5ebSJunchao Zhang     }
6052394ed5ebSJunchao Zhang     i[r + 1] = t;
6053394ed5ebSJunchao Zhang   }
6054394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6055394ed5ebSJunchao Zhang }
6056394ed5ebSJunchao Zhang 
6057158ec288SJunchao Zhang /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6058394ed5ebSJunchao Zhang 
6059394ed5ebSJunchao Zhang   Input Parameters:
6060394ed5ebSJunchao Zhang     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6061394ed5ebSJunchao Zhang     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6062394ed5ebSJunchao Zhang       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6063394ed5ebSJunchao Zhang 
6064394ed5ebSJunchao Zhang       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6065394ed5ebSJunchao Zhang       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6066394ed5ebSJunchao Zhang 
6067394ed5ebSJunchao Zhang   Output Parameters:
6068394ed5ebSJunchao Zhang     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6069394ed5ebSJunchao Zhang     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6070394ed5ebSJunchao Zhang       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6071394ed5ebSJunchao Zhang       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6072394ed5ebSJunchao Zhang 
6073394ed5ebSJunchao Zhang     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6074158ec288SJunchao Zhang       Atot: number of entries belonging to the diagonal block.
6075158ec288SJunchao Zhang       Annz: number of unique nonzeros belonging to the diagonal block.
6076394ed5ebSJunchao Zhang       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6077394ed5ebSJunchao Zhang         repeats (i.e., same 'i,j' pair).
6078394ed5ebSJunchao Zhang       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6079394ed5ebSJunchao Zhang         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6080394ed5ebSJunchao Zhang 
6081394ed5ebSJunchao Zhang       Atot: number of entries belonging to the diagonal block
6082394ed5ebSJunchao Zhang       Annz: number of unique nonzeros belonging to the diagonal block.
6083394ed5ebSJunchao Zhang 
6084394ed5ebSJunchao Zhang     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6085394ed5ebSJunchao Zhang 
6086158ec288SJunchao Zhang     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6087394ed5ebSJunchao Zhang */
60889371c9d4SSatish Balay static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) {
6089394ed5ebSJunchao Zhang   PetscInt    cstart, cend, rstart, rend, row, col;
6090394ed5ebSJunchao Zhang   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6091394ed5ebSJunchao Zhang   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6092394ed5ebSJunchao Zhang   PetscCount  k, m, p, q, r, s, mid;
6093394ed5ebSJunchao Zhang   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6094394ed5ebSJunchao Zhang 
6095394ed5ebSJunchao Zhang   PetscFunctionBegin;
60969566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
60979566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6098394ed5ebSJunchao Zhang   m = rend - rstart;
6099394ed5ebSJunchao Zhang 
61009371c9d4SSatish Balay   for (k = 0; k < n; k++) {
61019371c9d4SSatish Balay     if (i[k] >= 0) break;
61029371c9d4SSatish Balay   } /* Skip negative rows */
6103394ed5ebSJunchao Zhang 
6104394ed5ebSJunchao Zhang   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6105394ed5ebSJunchao Zhang      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6106394ed5ebSJunchao Zhang   */
6107394ed5ebSJunchao Zhang   while (k < n) {
6108394ed5ebSJunchao Zhang     row = i[k];
6109394ed5ebSJunchao Zhang     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
61109371c9d4SSatish Balay     for (s = k; s < n; s++)
61119371c9d4SSatish Balay       if (i[s] != row) break;
6112394ed5ebSJunchao Zhang     for (p = k; p < s; p++) {
6113394ed5ebSJunchao Zhang       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
611454c59aa7SJacob Faibussowitsch       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6115394ed5ebSJunchao Zhang     }
61169566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6117158ec288SJunchao Zhang     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6118394ed5ebSJunchao Zhang     rowBegin[row - rstart] = k;
6119394ed5ebSJunchao Zhang     rowMid[row - rstart]   = mid;
6120394ed5ebSJunchao Zhang     rowEnd[row - rstart]   = s;
6121394ed5ebSJunchao Zhang 
6122394ed5ebSJunchao Zhang     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6123394ed5ebSJunchao Zhang     Atot += mid - k;
6124394ed5ebSJunchao Zhang     Btot += s - mid;
6125394ed5ebSJunchao Zhang 
6126394ed5ebSJunchao Zhang     /* Count unique nonzeros of this diag/offdiag row */
6127394ed5ebSJunchao Zhang     for (p = k; p < mid;) {
6128394ed5ebSJunchao Zhang       col = j[p];
61299371c9d4SSatish Balay       do {
61309371c9d4SSatish Balay         j[p] += PETSC_MAX_INT;
61319371c9d4SSatish Balay         p++;
61329371c9d4SSatish Balay       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6133394ed5ebSJunchao Zhang       Annz++;
6134394ed5ebSJunchao Zhang     }
6135394ed5ebSJunchao Zhang 
6136394ed5ebSJunchao Zhang     for (p = mid; p < s;) {
6137394ed5ebSJunchao Zhang       col = j[p];
6138394ed5ebSJunchao Zhang       do { p++; } while (p < s && j[p] == col);
6139394ed5ebSJunchao Zhang       Bnnz++;
6140394ed5ebSJunchao Zhang     }
6141394ed5ebSJunchao Zhang     k = s;
6142394ed5ebSJunchao Zhang   }
6143394ed5ebSJunchao Zhang 
6144394ed5ebSJunchao Zhang   /* Allocation according to Atot, Btot, Annz, Bnnz */
6145158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Atot, &Aperm));
6146158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Btot, &Bperm));
6147158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6148158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6149394ed5ebSJunchao Zhang 
61506aad120cSJose E. Roman   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6151394ed5ebSJunchao Zhang   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6152394ed5ebSJunchao Zhang   for (r = 0; r < m; r++) {
6153394ed5ebSJunchao Zhang     k   = rowBegin[r];
6154394ed5ebSJunchao Zhang     mid = rowMid[r];
6155394ed5ebSJunchao Zhang     s   = rowEnd[r];
61569566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
61579566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6158394ed5ebSJunchao Zhang     Atot += mid - k;
6159394ed5ebSJunchao Zhang     Btot += s - mid;
6160394ed5ebSJunchao Zhang 
6161394ed5ebSJunchao Zhang     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6162394ed5ebSJunchao Zhang     for (p = k; p < mid;) {
6163394ed5ebSJunchao Zhang       col = j[p];
6164394ed5ebSJunchao Zhang       q   = p;
6165394ed5ebSJunchao Zhang       do { p++; } while (p < mid && j[p] == col);
6166394ed5ebSJunchao Zhang       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6167394ed5ebSJunchao Zhang       Annz++;
6168394ed5ebSJunchao Zhang     }
6169394ed5ebSJunchao Zhang 
6170394ed5ebSJunchao Zhang     for (p = mid; p < s;) {
6171394ed5ebSJunchao Zhang       col = j[p];
6172394ed5ebSJunchao Zhang       q   = p;
6173394ed5ebSJunchao Zhang       do { p++; } while (p < s && j[p] == col);
6174394ed5ebSJunchao Zhang       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6175394ed5ebSJunchao Zhang       Bnnz++;
6176394ed5ebSJunchao Zhang     }
6177394ed5ebSJunchao Zhang   }
6178394ed5ebSJunchao Zhang   /* Output */
6179394ed5ebSJunchao Zhang   *Aperm_ = Aperm;
6180394ed5ebSJunchao Zhang   *Annz_  = Annz;
6181394ed5ebSJunchao Zhang   *Atot_  = Atot;
6182394ed5ebSJunchao Zhang   *Ajmap_ = Ajmap;
6183394ed5ebSJunchao Zhang   *Bperm_ = Bperm;
6184394ed5ebSJunchao Zhang   *Bnnz_  = Bnnz;
6185394ed5ebSJunchao Zhang   *Btot_  = Btot;
6186394ed5ebSJunchao Zhang   *Bjmap_ = Bjmap;
6187394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6188394ed5ebSJunchao Zhang }
6189394ed5ebSJunchao Zhang 
6190158ec288SJunchao Zhang /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6191158ec288SJunchao Zhang 
6192158ec288SJunchao Zhang   Input Parameters:
6193158ec288SJunchao Zhang     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6194158ec288SJunchao Zhang     nnz:  number of unique nonzeros in the merged matrix
6195158ec288SJunchao Zhang     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6196158ec288SJunchao Zhang     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6197158ec288SJunchao Zhang 
6198158ec288SJunchao Zhang   Output Parameter: (memory is allocated by the caller)
6199158ec288SJunchao Zhang     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6200158ec288SJunchao Zhang 
6201158ec288SJunchao Zhang   Example:
6202158ec288SJunchao Zhang     nnz1 = 4
6203158ec288SJunchao Zhang     nnz  = 6
6204158ec288SJunchao Zhang     imap = [1,3,4,5]
6205158ec288SJunchao Zhang     jmap = [0,3,5,6,7]
6206158ec288SJunchao Zhang    then,
6207158ec288SJunchao Zhang     jmap_new = [0,0,3,3,5,6,7]
6208158ec288SJunchao Zhang */
62099371c9d4SSatish Balay static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) {
6210158ec288SJunchao Zhang   PetscCount k, p;
6211158ec288SJunchao Zhang 
6212158ec288SJunchao Zhang   PetscFunctionBegin;
6213158ec288SJunchao Zhang   jmap_new[0] = 0;
6214158ec288SJunchao Zhang   p           = nnz;                /* p loops over jmap_new[] backwards */
6215158ec288SJunchao Zhang   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6216158ec288SJunchao Zhang     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6217158ec288SJunchao Zhang   }
6218158ec288SJunchao Zhang   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6219158ec288SJunchao Zhang   PetscFunctionReturn(0);
6220158ec288SJunchao Zhang }
6221158ec288SJunchao Zhang 
62229371c9d4SSatish Balay PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) {
6223394ed5ebSJunchao Zhang   MPI_Comm    comm;
6224394ed5ebSJunchao Zhang   PetscMPIInt rank, size;
6225394ed5ebSJunchao Zhang   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6226394ed5ebSJunchao Zhang   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6227394ed5ebSJunchao Zhang   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6228394ed5ebSJunchao Zhang 
6229394ed5ebSJunchao Zhang   PetscFunctionBegin;
62309566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpiaij->garray));
62319566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mpiaij->lvec));
6232cbc6b225SStefano Zampini #if defined(PETSC_USE_CTABLE)
62339566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6234cbc6b225SStefano Zampini #else
62359566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpiaij->colmap));
6236cbc6b225SStefano Zampini #endif
62379566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6238cbc6b225SStefano Zampini   mat->assembled     = PETSC_FALSE;
6239cbc6b225SStefano Zampini   mat->was_assembled = PETSC_FALSE;
62409566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6241cbc6b225SStefano Zampini 
62429566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
62439566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
62449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
62459566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
62469566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
62479566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
62489566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
62499566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
62509566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &M, &N));
6251394ed5ebSJunchao Zhang 
6252394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
62536aad120cSJose E. Roman   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6254394ed5ebSJunchao Zhang   /* entries come first, then local rows, then remote rows.                     */
6255394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6256394ed5ebSJunchao Zhang   PetscCount n1 = coo_n, *perm1;
6257e8729f6fSJunchao Zhang   PetscInt  *i1 = coo_i, *j1 = coo_j;
6258e8729f6fSJunchao Zhang 
6259e8729f6fSJunchao Zhang   PetscCall(PetscMalloc1(n1, &perm1));
6260394ed5ebSJunchao Zhang   for (k = 0; k < n1; k++) perm1[k] = k;
6261394ed5ebSJunchao Zhang 
6262394ed5ebSJunchao Zhang   /* Manipulate indices so that entries with negative row or col indices will have smallest
6263394ed5ebSJunchao Zhang      row indices, local entries will have greater but negative row indices, and remote entries
6264394ed5ebSJunchao Zhang      will have positive row indices.
6265394ed5ebSJunchao Zhang   */
6266394ed5ebSJunchao Zhang   for (k = 0; k < n1; k++) {
6267394ed5ebSJunchao Zhang     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6268394ed5ebSJunchao Zhang     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6269f7d195e4SLawrence Mitchell     else {
6270f7d195e4SLawrence Mitchell       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6271f7d195e4SLawrence Mitchell       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6272f7d195e4SLawrence Mitchell     }
6273394ed5ebSJunchao Zhang   }
6274394ed5ebSJunchao Zhang 
6275394ed5ebSJunchao Zhang   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
62769566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
62779371c9d4SSatish Balay   for (k = 0; k < n1; k++) {
62789371c9d4SSatish Balay     if (i1[k] > PETSC_MIN_INT) break;
62799371c9d4SSatish Balay   }                                                                               /* Advance k to the first entry we need to take care of */
62809566063dSJacob Faibussowitsch   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6281394ed5ebSJunchao Zhang   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6282394ed5ebSJunchao Zhang 
6283394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6284394ed5ebSJunchao Zhang   /*           Split local rows into diag/offdiag portions                      */
6285394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6286394ed5ebSJunchao Zhang   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6287394ed5ebSJunchao Zhang   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6288394ed5ebSJunchao Zhang   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6289394ed5ebSJunchao Zhang 
62909566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
62919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
62929566063dSJacob Faibussowitsch   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6293394ed5ebSJunchao Zhang 
6294394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6295394ed5ebSJunchao Zhang   /*           Send remote rows to their owner                                  */
6296394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------------------*/
6297394ed5ebSJunchao Zhang   /* Find which rows should be sent to which remote ranks*/
6298394ed5ebSJunchao Zhang   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6299394ed5ebSJunchao Zhang   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6300394ed5ebSJunchao Zhang   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6301394ed5ebSJunchao Zhang   const PetscInt *ranges;
6302394ed5ebSJunchao Zhang   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6303394ed5ebSJunchao Zhang 
63049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
63059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6306394ed5ebSJunchao Zhang   for (k = rem; k < n1;) {
6307394ed5ebSJunchao Zhang     PetscMPIInt owner;
6308394ed5ebSJunchao Zhang     PetscInt    firstRow, lastRow;
6309cbc6b225SStefano Zampini 
6310394ed5ebSJunchao Zhang     /* Locate a row range */
6311394ed5ebSJunchao Zhang     firstRow = i1[k]; /* first row of this owner */
63129566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6313394ed5ebSJunchao Zhang     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6314394ed5ebSJunchao Zhang 
6315394ed5ebSJunchao Zhang     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
63169566063dSJacob Faibussowitsch     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6317394ed5ebSJunchao Zhang 
6318394ed5ebSJunchao Zhang     /* All entries in [k,p) belong to this remote owner */
6319394ed5ebSJunchao Zhang     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6320394ed5ebSJunchao Zhang       PetscMPIInt *sendto2;
6321394ed5ebSJunchao Zhang       PetscInt    *nentries2;
6322394ed5ebSJunchao Zhang       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6323cbc6b225SStefano Zampini 
63249566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
63259566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
63269566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
63279566063dSJacob Faibussowitsch       PetscCall(PetscFree2(sendto, nentries2));
6328394ed5ebSJunchao Zhang       sendto   = sendto2;
6329394ed5ebSJunchao Zhang       nentries = nentries2;
6330394ed5ebSJunchao Zhang       maxNsend = maxNsend2;
6331394ed5ebSJunchao Zhang     }
6332394ed5ebSJunchao Zhang     sendto[nsend]   = owner;
6333394ed5ebSJunchao Zhang     nentries[nsend] = p - k;
63349566063dSJacob Faibussowitsch     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6335394ed5ebSJunchao Zhang     nsend++;
6336394ed5ebSJunchao Zhang     k = p;
6337394ed5ebSJunchao Zhang   }
6338394ed5ebSJunchao Zhang 
6339394ed5ebSJunchao Zhang   /* Build 1st SF to know offsets on remote to send data */
6340394ed5ebSJunchao Zhang   PetscSF      sf1;
6341394ed5ebSJunchao Zhang   PetscInt     nroots = 1, nroots2 = 0;
6342394ed5ebSJunchao Zhang   PetscInt     nleaves = nsend, nleaves2 = 0;
6343394ed5ebSJunchao Zhang   PetscInt    *offsets;
6344394ed5ebSJunchao Zhang   PetscSFNode *iremote;
6345394ed5ebSJunchao Zhang 
63469566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf1));
63479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsend, &iremote));
63489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsend, &offsets));
6349394ed5ebSJunchao Zhang   for (k = 0; k < nsend; k++) {
6350394ed5ebSJunchao Zhang     iremote[k].rank  = sendto[k];
6351394ed5ebSJunchao Zhang     iremote[k].index = 0;
6352394ed5ebSJunchao Zhang     nleaves2 += nentries[k];
635354c59aa7SJacob Faibussowitsch     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6354394ed5ebSJunchao Zhang   }
63559566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
63569566063dSJacob Faibussowitsch   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
63579566063dSJacob Faibussowitsch   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
63589566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf1));
635963a3b9bcSJacob Faibussowitsch   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6360394ed5ebSJunchao Zhang 
6361394ed5ebSJunchao Zhang   /* Build 2nd SF to send remote COOs to their owner */
6362394ed5ebSJunchao Zhang   PetscSF sf2;
6363394ed5ebSJunchao Zhang   nroots  = nroots2;
6364394ed5ebSJunchao Zhang   nleaves = nleaves2;
63659566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf2));
63669566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf2));
63679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves, &iremote));
6368394ed5ebSJunchao Zhang   p = 0;
6369394ed5ebSJunchao Zhang   for (k = 0; k < nsend; k++) {
637054c59aa7SJacob Faibussowitsch     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6371394ed5ebSJunchao Zhang     for (q = 0; q < nentries[k]; q++, p++) {
6372394ed5ebSJunchao Zhang       iremote[p].rank  = sendto[k];
6373394ed5ebSJunchao Zhang       iremote[p].index = offsets[k] + q;
6374394ed5ebSJunchao Zhang     }
6375394ed5ebSJunchao Zhang   }
63769566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6377394ed5ebSJunchao Zhang 
63786aad120cSJose E. Roman   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
63799566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6380394ed5ebSJunchao Zhang 
6381394ed5ebSJunchao Zhang   /* Send the remote COOs to their owner */
6382394ed5ebSJunchao Zhang   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6383394ed5ebSJunchao Zhang   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
63849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
63859566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
63869566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
63879566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
63889566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6389394ed5ebSJunchao Zhang 
63909566063dSJacob Faibussowitsch   PetscCall(PetscFree(offsets));
63919566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sendto, nentries));
6392394ed5ebSJunchao Zhang 
6393394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6394394ed5ebSJunchao Zhang   /* Sort received COOs by row along with the permutation array     */
6395394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6396394ed5ebSJunchao Zhang   for (k = 0; k < n2; k++) perm2[k] = k;
63979566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6398394ed5ebSJunchao Zhang 
6399394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6400394ed5ebSJunchao Zhang   /* Split received COOs into diag/offdiag portions                 */
6401394ed5ebSJunchao Zhang   /* ---------------------------------------------------------------*/
6402394ed5ebSJunchao Zhang   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6403394ed5ebSJunchao Zhang   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6404394ed5ebSJunchao Zhang   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6405394ed5ebSJunchao Zhang 
64069566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
64079566063dSJacob Faibussowitsch   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6408394ed5ebSJunchao Zhang 
6409394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------*/
6410394ed5ebSJunchao Zhang   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6411394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------*/
6412394ed5ebSJunchao Zhang   PetscInt *Ai, *Bi;
6413394ed5ebSJunchao Zhang   PetscInt *Aj, *Bj;
6414394ed5ebSJunchao Zhang 
64159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &Ai));
64169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &Bi));
64179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
64189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6419394ed5ebSJunchao Zhang 
6420394ed5ebSJunchao Zhang   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6421158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6422158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6423158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6424158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6425394ed5ebSJunchao Zhang 
64269566063dSJacob Faibussowitsch   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
64279566063dSJacob Faibussowitsch   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6428158ec288SJunchao Zhang 
6429158ec288SJunchao Zhang   /* --------------------------------------------------------------------------*/
6430158ec288SJunchao Zhang   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6431158ec288SJunchao Zhang   /* expect nonzeros in A/B most likely have local contributing entries        */
6432158ec288SJunchao Zhang   /* --------------------------------------------------------------------------*/
6433158ec288SJunchao Zhang   PetscInt    Annz = Ai[m];
6434158ec288SJunchao Zhang   PetscInt    Bnnz = Bi[m];
6435158ec288SJunchao Zhang   PetscCount *Ajmap1_new, *Bjmap1_new;
6436158ec288SJunchao Zhang 
6437158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6438158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6439158ec288SJunchao Zhang 
6440158ec288SJunchao Zhang   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6441158ec288SJunchao Zhang   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6442158ec288SJunchao Zhang 
6443158ec288SJunchao Zhang   PetscCall(PetscFree(Aimap1));
6444158ec288SJunchao Zhang   PetscCall(PetscFree(Ajmap1));
6445158ec288SJunchao Zhang   PetscCall(PetscFree(Bimap1));
6446158ec288SJunchao Zhang   PetscCall(PetscFree(Bjmap1));
64479566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
64489566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6449e8729f6fSJunchao Zhang   PetscCall(PetscFree(perm1));
64509566063dSJacob Faibussowitsch   PetscCall(PetscFree3(i2, j2, perm2));
6451394ed5ebSJunchao Zhang 
6452158ec288SJunchao Zhang   Ajmap1 = Ajmap1_new;
6453158ec288SJunchao Zhang   Bjmap1 = Bjmap1_new;
6454158ec288SJunchao Zhang 
6455394ed5ebSJunchao Zhang   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6456394ed5ebSJunchao Zhang   if (Annz < Annz1 + Annz2) {
6457394ed5ebSJunchao Zhang     PetscInt *Aj_new;
64589566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Annz, &Aj_new));
64599566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
64609566063dSJacob Faibussowitsch     PetscCall(PetscFree(Aj));
6461394ed5ebSJunchao Zhang     Aj = Aj_new;
6462394ed5ebSJunchao Zhang   }
6463394ed5ebSJunchao Zhang 
6464394ed5ebSJunchao Zhang   if (Bnnz < Bnnz1 + Bnnz2) {
6465394ed5ebSJunchao Zhang     PetscInt *Bj_new;
64669566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
64679566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
64689566063dSJacob Faibussowitsch     PetscCall(PetscFree(Bj));
6469394ed5ebSJunchao Zhang     Bj = Bj_new;
6470394ed5ebSJunchao Zhang   }
6471394ed5ebSJunchao Zhang 
6472394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------------*/
6473cbc6b225SStefano Zampini   /* Create new submatrices for on-process and off-process coupling                  */
6474394ed5ebSJunchao Zhang   /* --------------------------------------------------------------------------------*/
6475394ed5ebSJunchao Zhang   PetscScalar *Aa, *Ba;
6476cbc6b225SStefano Zampini   MatType      rtype;
6477394ed5ebSJunchao Zhang   Mat_SeqAIJ  *a, *b;
64789566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
64799566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Bnnz, &Ba));
6480394ed5ebSJunchao Zhang   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
64819371c9d4SSatish Balay   if (cstart) {
64829371c9d4SSatish Balay     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
64839371c9d4SSatish Balay   }
64849566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mpiaij->A));
64859566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mpiaij->B));
64869566063dSJacob Faibussowitsch   PetscCall(MatGetRootType_Private(mat, &rtype));
64879566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
64889566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
64899566063dSJacob Faibussowitsch   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6490cbc6b225SStefano Zampini 
6491394ed5ebSJunchao Zhang   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6492394ed5ebSJunchao Zhang   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6493394ed5ebSJunchao Zhang   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6494394ed5ebSJunchao Zhang   a->free_a = b->free_a = PETSC_TRUE;
6495394ed5ebSJunchao Zhang   a->free_ij = b->free_ij = PETSC_TRUE;
6496394ed5ebSJunchao Zhang 
6497cbc6b225SStefano Zampini   /* conversion must happen AFTER multiply setup */
64989566063dSJacob Faibussowitsch   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
64999566063dSJacob Faibussowitsch   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
65009566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mpiaij->lvec));
65019566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6502cbc6b225SStefano Zampini 
6503394ed5ebSJunchao Zhang   mpiaij->coo_n   = coo_n;
6504394ed5ebSJunchao Zhang   mpiaij->coo_sf  = sf2;
6505394ed5ebSJunchao Zhang   mpiaij->sendlen = nleaves;
6506394ed5ebSJunchao Zhang   mpiaij->recvlen = nroots;
6507394ed5ebSJunchao Zhang 
6508158ec288SJunchao Zhang   mpiaij->Annz = Annz;
6509158ec288SJunchao Zhang   mpiaij->Bnnz = Bnnz;
6510158ec288SJunchao Zhang 
6511394ed5ebSJunchao Zhang   mpiaij->Annz2 = Annz2;
6512394ed5ebSJunchao Zhang   mpiaij->Bnnz2 = Bnnz2;
6513394ed5ebSJunchao Zhang 
6514394ed5ebSJunchao Zhang   mpiaij->Atot1 = Atot1;
6515394ed5ebSJunchao Zhang   mpiaij->Atot2 = Atot2;
6516394ed5ebSJunchao Zhang   mpiaij->Btot1 = Btot1;
6517394ed5ebSJunchao Zhang   mpiaij->Btot2 = Btot2;
6518394ed5ebSJunchao Zhang 
6519394ed5ebSJunchao Zhang   mpiaij->Ajmap1 = Ajmap1;
6520394ed5ebSJunchao Zhang   mpiaij->Aperm1 = Aperm1;
6521158ec288SJunchao Zhang 
6522158ec288SJunchao Zhang   mpiaij->Bjmap1 = Bjmap1;
6523394ed5ebSJunchao Zhang   mpiaij->Bperm1 = Bperm1;
6524158ec288SJunchao Zhang 
6525158ec288SJunchao Zhang   mpiaij->Aimap2 = Aimap2;
6526158ec288SJunchao Zhang   mpiaij->Ajmap2 = Ajmap2;
6527158ec288SJunchao Zhang   mpiaij->Aperm2 = Aperm2;
6528158ec288SJunchao Zhang 
6529158ec288SJunchao Zhang   mpiaij->Bimap2 = Bimap2;
6530158ec288SJunchao Zhang   mpiaij->Bjmap2 = Bjmap2;
6531394ed5ebSJunchao Zhang   mpiaij->Bperm2 = Bperm2;
6532394ed5ebSJunchao Zhang 
6533394ed5ebSJunchao Zhang   mpiaij->Cperm1 = Cperm1;
6534394ed5ebSJunchao Zhang 
6535394ed5ebSJunchao Zhang   /* Allocate in preallocation. If not used, it has zero cost on host */
65369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6537394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6538394ed5ebSJunchao Zhang }
6539394ed5ebSJunchao Zhang 
65409371c9d4SSatish Balay static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) {
6541394ed5ebSJunchao Zhang   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6542394ed5ebSJunchao Zhang   Mat               A = mpiaij->A, B = mpiaij->B;
6543158ec288SJunchao Zhang   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6544394ed5ebSJunchao Zhang   PetscScalar      *Aa, *Ba;
6545394ed5ebSJunchao Zhang   PetscScalar      *sendbuf = mpiaij->sendbuf;
6546394ed5ebSJunchao Zhang   PetscScalar      *recvbuf = mpiaij->recvbuf;
6547158ec288SJunchao Zhang   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6548158ec288SJunchao Zhang   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6549394ed5ebSJunchao Zhang   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6550394ed5ebSJunchao Zhang   const PetscCount *Cperm1 = mpiaij->Cperm1;
6551394ed5ebSJunchao Zhang 
6552394ed5ebSJunchao Zhang   PetscFunctionBegin;
65539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
65549566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(B, &Ba));
6555394ed5ebSJunchao Zhang 
6556394ed5ebSJunchao Zhang   /* Pack entries to be sent to remote */
6557394ed5ebSJunchao Zhang   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6558394ed5ebSJunchao Zhang 
6559394ed5ebSJunchao Zhang   /* Send remote entries to their owner and overlap the communication with local computation */
65609566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6561394ed5ebSJunchao Zhang   /* Add local entries to A and B */
6562158ec288SJunchao Zhang   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6563158ec288SJunchao Zhang     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6564158ec288SJunchao Zhang     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6565158ec288SJunchao Zhang     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6566394ed5ebSJunchao Zhang   }
6567158ec288SJunchao Zhang   for (PetscCount i = 0; i < Bnnz; i++) {
6568158ec288SJunchao Zhang     PetscScalar sum = 0.0;
6569158ec288SJunchao Zhang     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6570158ec288SJunchao Zhang     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6571394ed5ebSJunchao Zhang   }
65729566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6573394ed5ebSJunchao Zhang 
6574394ed5ebSJunchao Zhang   /* Add received remote entries to A and B */
6575394ed5ebSJunchao Zhang   for (PetscCount i = 0; i < Annz2; i++) {
6576394ed5ebSJunchao Zhang     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6577394ed5ebSJunchao Zhang   }
6578394ed5ebSJunchao Zhang   for (PetscCount i = 0; i < Bnnz2; i++) {
6579394ed5ebSJunchao Zhang     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6580394ed5ebSJunchao Zhang   }
65819566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
65829566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6583394ed5ebSJunchao Zhang   PetscFunctionReturn(0);
6584394ed5ebSJunchao Zhang }
6585394ed5ebSJunchao Zhang 
65864222ddf1SHong Zhang /* ----------------------------------------------------------------*/
65874222ddf1SHong Zhang 
6588ccd8e176SBarry Smith /*MC
6589ccd8e176SBarry Smith    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6590ccd8e176SBarry Smith 
6591ccd8e176SBarry Smith    Options Database Keys:
659211a5261eSBarry Smith . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6593ccd8e176SBarry Smith 
6594ccd8e176SBarry Smith    Level: beginner
65950cd7f59aSBarry Smith 
65960cd7f59aSBarry Smith    Notes:
659711a5261eSBarry Smith     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
65980cd7f59aSBarry Smith     in this case the values associated with the rows and columns one passes in are set to zero
65990cd7f59aSBarry Smith     in the matrix
66000cd7f59aSBarry Smith 
660111a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
660211a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6603ccd8e176SBarry Smith 
660411a5261eSBarry Smith .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6605ccd8e176SBarry Smith M*/
6606ccd8e176SBarry Smith 
66079371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) {
6608ccd8e176SBarry Smith   Mat_MPIAIJ *b;
6609ccd8e176SBarry Smith   PetscMPIInt size;
6610ccd8e176SBarry Smith 
6611ccd8e176SBarry Smith   PetscFunctionBegin;
66129566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
66132205254eSKarl Rupp 
6614*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
6615ccd8e176SBarry Smith   B->data = (void *)b;
66169566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6617ccd8e176SBarry Smith   B->assembled  = PETSC_FALSE;
6618ccd8e176SBarry Smith   B->insertmode = NOT_SET_VALUES;
6619ccd8e176SBarry Smith   b->size       = size;
66202205254eSKarl Rupp 
66219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6622ccd8e176SBarry Smith 
6623ccd8e176SBarry Smith   /* build cache for off array entries formed */
66249566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
66252205254eSKarl Rupp 
6626ccd8e176SBarry Smith   b->donotstash  = PETSC_FALSE;
6627f4259b30SLisandro Dalcin   b->colmap      = NULL;
6628f4259b30SLisandro Dalcin   b->garray      = NULL;
6629ccd8e176SBarry Smith   b->roworiented = PETSC_TRUE;
6630ccd8e176SBarry Smith 
6631ccd8e176SBarry Smith   /* stuff used for matrix vector multiply */
66320298fd71SBarry Smith   b->lvec  = NULL;
66330298fd71SBarry Smith   b->Mvctx = NULL;
6634ccd8e176SBarry Smith 
6635ccd8e176SBarry Smith   /* stuff for MatGetRow() */
6636f4259b30SLisandro Dalcin   b->rowindices   = NULL;
6637f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
6638ccd8e176SBarry Smith   b->getrowactive = PETSC_FALSE;
6639ccd8e176SBarry Smith 
6640f719121fSJed Brown   /* flexible pointer used in CUSPARSE classes */
66410298fd71SBarry Smith   b->spptr = NULL;
6642f60c3dc2SHong Zhang 
66439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
66449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
66459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
66469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
66479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
66489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
66499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
66509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
66519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
66529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
66533d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA)
66549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
66553d0639e7SStefano Zampini #endif
66563d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
66579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
66583d0639e7SStefano Zampini #endif
66599779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE)
66609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6661191b95cbSRichard Tran Mills #endif
66629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
66639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
66649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
66659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
66665d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
66679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
66685d7652ecSHong Zhang #endif
6669d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
66709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6671d24d4204SJose E. Roman #endif
66729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
66739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
66743dad0653Sstefano_zampini #if defined(PETSC_HAVE_HYPRE)
66759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
66769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
66773dad0653Sstefano_zampini #endif
66789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
66799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
66809566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
66819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
66829566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6683ccd8e176SBarry Smith   PetscFunctionReturn(0);
6684ccd8e176SBarry Smith }
668581824310SBarry Smith 
6686cce60c4dSBarry Smith /*@C
668711a5261eSBarry Smith      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
668803bfb495SBarry Smith          and "off-diagonal" part of the matrix in CSR format.
668903bfb495SBarry Smith 
6690d083f849SBarry Smith    Collective
669103bfb495SBarry Smith 
669203bfb495SBarry Smith    Input Parameters:
669303bfb495SBarry Smith +  comm - MPI communicator
669411a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
669503bfb495SBarry Smith .  n - This value should be the same as the local size used in creating the
669611a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
669703bfb495SBarry Smith        calculated if N is given) For square matrices n is almost always m.
669811a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
669911a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6700483a2f95SBarry Smith .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
670104ccdda3SJunchao Zhang .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
670203bfb495SBarry Smith .   a - matrix values
6703483a2f95SBarry Smith .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
670404ccdda3SJunchao Zhang .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
670503bfb495SBarry Smith -   oa - matrix values
670603bfb495SBarry Smith 
670703bfb495SBarry Smith    Output Parameter:
670803bfb495SBarry Smith .   mat - the matrix
670903bfb495SBarry Smith 
671003bfb495SBarry Smith    Level: advanced
671103bfb495SBarry Smith 
671203bfb495SBarry Smith    Notes:
6713292fb18eSBarry Smith        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6714292fb18eSBarry Smith        must free the arrays once the matrix has been destroyed and not before.
671503bfb495SBarry Smith 
671603bfb495SBarry Smith        The i and j indices are 0 based
671703bfb495SBarry Smith 
671869b1f4b7SBarry Smith        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
671903bfb495SBarry Smith 
67207b55108eSBarry Smith        This sets local rows and cannot be used to set off-processor values.
67217b55108eSBarry Smith 
6722dca341c0SJed Brown        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6723dca341c0SJed Brown        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6724dca341c0SJed Brown        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6725dca341c0SJed Brown        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
672611a5261eSBarry Smith        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6727dca341c0SJed Brown        communication if it is known that only local entries will be set.
672803bfb495SBarry Smith 
6729db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6730db781477SPatrick Sanan           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
67312b26979fSBarry Smith @*/
67329371c9d4SSatish Balay PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) {
673303bfb495SBarry Smith   Mat_MPIAIJ *maij;
673403bfb495SBarry Smith 
673503bfb495SBarry Smith   PetscFunctionBegin;
673608401ef6SPierre Jolivet   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6737aed4548fSBarry Smith   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6738aed4548fSBarry Smith   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
67399566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
67409566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
67419566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPIAIJ));
674203bfb495SBarry Smith   maij = (Mat_MPIAIJ *)(*mat)->data;
67432205254eSKarl Rupp 
67448d7a6e47SBarry Smith   (*mat)->preallocated = PETSC_TRUE;
674503bfb495SBarry Smith 
67469566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->rmap));
67479566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->cmap));
674803bfb495SBarry Smith 
67499566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
67509566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
675103bfb495SBarry Smith 
67529566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
67539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
67549566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
67559566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
67569566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
675703bfb495SBarry Smith   PetscFunctionReturn(0);
675803bfb495SBarry Smith }
675903bfb495SBarry Smith 
67604e84afc0SStefano Zampini typedef struct {
67614e84afc0SStefano Zampini   Mat       *mp;    /* intermediate products */
67624e84afc0SStefano Zampini   PetscBool *mptmp; /* is the intermediate product temporary ? */
67634e84afc0SStefano Zampini   PetscInt   cp;    /* number of intermediate products */
67644e84afc0SStefano Zampini 
67654e84afc0SStefano Zampini   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
67664e84afc0SStefano Zampini   PetscInt    *startsj_s, *startsj_r;
67674e84afc0SStefano Zampini   PetscScalar *bufa;
67684e84afc0SStefano Zampini   Mat          P_oth;
67694e84afc0SStefano Zampini 
67704e84afc0SStefano Zampini   /* may take advantage of merging product->B */
6771ddea5d60SJunchao Zhang   Mat Bloc; /* B-local by merging diag and off-diag */
67724e84afc0SStefano Zampini 
6773ddea5d60SJunchao Zhang   /* cusparse does not have support to split between symbolic and numeric phases.
67744e84afc0SStefano Zampini      When api_user is true, we don't need to update the numerical values
67754e84afc0SStefano Zampini      of the temporary storage */
67764e84afc0SStefano Zampini   PetscBool reusesym;
67774e84afc0SStefano Zampini 
67784e84afc0SStefano Zampini   /* support for COO values insertion */
6779ddea5d60SJunchao Zhang   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6780ddea5d60SJunchao Zhang   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6781ddea5d60SJunchao Zhang   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6782ddea5d60SJunchao Zhang   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6783c215019aSStefano Zampini   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6784c215019aSStefano Zampini   PetscMemType mtype;
67854e84afc0SStefano Zampini 
67864e84afc0SStefano Zampini   /* customization */
67874e84afc0SStefano Zampini   PetscBool abmerge;
6788abb89eb1SStefano Zampini   PetscBool P_oth_bind;
67894e84afc0SStefano Zampini } MatMatMPIAIJBACKEND;
67904e84afc0SStefano Zampini 
67919371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) {
67924e84afc0SStefano Zampini   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
67934e84afc0SStefano Zampini   PetscInt             i;
67944e84afc0SStefano Zampini 
67954e84afc0SStefano Zampini   PetscFunctionBegin;
67969566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
67979566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->bufa));
67989566063dSJacob Faibussowitsch   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
67999566063dSJacob Faibussowitsch   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
68009566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mmdata->P_oth));
68019566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mmdata->Bloc));
68029566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mmdata->sf));
680348a46eb9SPierre Jolivet   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
68049566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
68059566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->own[0]));
68069566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->own));
68079566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->off[0]));
68089566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->off));
68099566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata));
68104e84afc0SStefano Zampini   PetscFunctionReturn(0);
68114e84afc0SStefano Zampini }
68124e84afc0SStefano Zampini 
6813fff043a9SJunchao Zhang /* Copy selected n entries with indices in idx[] of A to v[].
6814fff043a9SJunchao Zhang    If idx is NULL, copy the whole data array of A to v[]
6815fff043a9SJunchao Zhang  */
68169371c9d4SSatish Balay static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) {
6817c215019aSStefano Zampini   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6818c215019aSStefano Zampini 
6819c215019aSStefano Zampini   PetscFunctionBegin;
68209566063dSJacob Faibussowitsch   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6821c215019aSStefano Zampini   if (f) {
68229566063dSJacob Faibussowitsch     PetscCall((*f)(A, n, idx, v));
6823c215019aSStefano Zampini   } else {
6824c215019aSStefano Zampini     const PetscScalar *vv;
6825c215019aSStefano Zampini 
68269566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6827c215019aSStefano Zampini     if (n && idx) {
6828c215019aSStefano Zampini       PetscScalar    *w  = v;
6829c215019aSStefano Zampini       const PetscInt *oi = idx;
6830c215019aSStefano Zampini       PetscInt        j;
6831c215019aSStefano Zampini 
6832c215019aSStefano Zampini       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6833c215019aSStefano Zampini     } else {
68349566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(v, vv, n));
6835c215019aSStefano Zampini     }
68369566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6837c215019aSStefano Zampini   }
6838c215019aSStefano Zampini   PetscFunctionReturn(0);
6839c215019aSStefano Zampini }
6840c215019aSStefano Zampini 
68419371c9d4SSatish Balay static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) {
68424e84afc0SStefano Zampini   MatMatMPIAIJBACKEND *mmdata;
68434e84afc0SStefano Zampini   PetscInt             i, n_d, n_o;
68444e84afc0SStefano Zampini 
68454e84afc0SStefano Zampini   PetscFunctionBegin;
68464e84afc0SStefano Zampini   MatCheckProduct(C, 1);
684728b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
68484e84afc0SStefano Zampini   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
68494e84afc0SStefano Zampini   if (!mmdata->reusesym) { /* update temporary matrices */
685048a46eb9SPierre Jolivet     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
685148a46eb9SPierre Jolivet     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
68524e84afc0SStefano Zampini   }
68534e84afc0SStefano Zampini   mmdata->reusesym = PETSC_FALSE;
6854abb89eb1SStefano Zampini 
6855abb89eb1SStefano Zampini   for (i = 0; i < mmdata->cp; i++) {
685608401ef6SPierre Jolivet     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
68579566063dSJacob Faibussowitsch     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6858abb89eb1SStefano Zampini   }
68594e84afc0SStefano Zampini   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
68604e84afc0SStefano Zampini     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
68614e84afc0SStefano Zampini 
68624e84afc0SStefano Zampini     if (mmdata->mptmp[i]) continue;
68634e84afc0SStefano Zampini     if (noff) {
6864c215019aSStefano Zampini       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
6865c215019aSStefano Zampini 
68669566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
68679566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
68684e84afc0SStefano Zampini       n_o += noff;
68694e84afc0SStefano Zampini       n_d += nown;
68704e84afc0SStefano Zampini     } else {
6871c215019aSStefano Zampini       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
6872c215019aSStefano Zampini 
68739566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
68744e84afc0SStefano Zampini       n_d += mm->nz;
68754e84afc0SStefano Zampini     }
68764e84afc0SStefano Zampini   }
6877c215019aSStefano Zampini   if (mmdata->hasoffproc) { /* offprocess insertion */
68789566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
68799566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
68804e84afc0SStefano Zampini   }
68819566063dSJacob Faibussowitsch   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
68824e84afc0SStefano Zampini   PetscFunctionReturn(0);
68834e84afc0SStefano Zampini }
68844e84afc0SStefano Zampini 
68854e84afc0SStefano Zampini /* Support for Pt * A, A * P, or Pt * A * P */
68864e84afc0SStefano Zampini #define MAX_NUMBER_INTERMEDIATE 4
68879371c9d4SSatish Balay PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) {
68884e84afc0SStefano Zampini   Mat_Product           *product = C->product;
6889ddea5d60SJunchao Zhang   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
68904e84afc0SStefano Zampini   Mat_MPIAIJ            *a, *p;
68914e84afc0SStefano Zampini   MatMatMPIAIJBACKEND   *mmdata;
68924e84afc0SStefano Zampini   ISLocalToGlobalMapping P_oth_l2g = NULL;
68934e84afc0SStefano Zampini   IS                     glob      = NULL;
68944e84afc0SStefano Zampini   const char            *prefix;
68954e84afc0SStefano Zampini   char                   pprefix[256];
68964e84afc0SStefano Zampini   const PetscInt        *globidx, *P_oth_idx;
689782a78a4eSJed Brown   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
689882a78a4eSJed Brown   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
6899ddea5d60SJunchao Zhang   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE];  /* col/row map type for each Mat in mp[]. */
6900ddea5d60SJunchao Zhang                                                                                           /* type-0: consecutive, start from 0; type-1: consecutive with */
6901ddea5d60SJunchao Zhang                                                                                           /* a base offset; type-2: sparse with a local to global map table */
6902ddea5d60SJunchao Zhang   const PetscInt        *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6903ddea5d60SJunchao Zhang 
69044e84afc0SStefano Zampini   MatProductType ptype;
6905c215019aSStefano Zampini   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
69064e84afc0SStefano Zampini   PetscMPIInt    size;
69074e84afc0SStefano Zampini 
69084e84afc0SStefano Zampini   PetscFunctionBegin;
69094e84afc0SStefano Zampini   MatCheckProduct(C, 1);
691028b400f6SJacob Faibussowitsch   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
69114e84afc0SStefano Zampini   ptype = product->type;
6912b94d7dedSBarry Smith   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
6913fa046f9fSJunchao Zhang     ptype                                          = MATPRODUCT_AB;
6914fa046f9fSJunchao Zhang     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6915fa046f9fSJunchao Zhang   }
69164e84afc0SStefano Zampini   switch (ptype) {
69174e84afc0SStefano Zampini   case MATPRODUCT_AB:
69184e84afc0SStefano Zampini     A          = product->A;
69194e84afc0SStefano Zampini     P          = product->B;
69204e84afc0SStefano Zampini     m          = A->rmap->n;
69214e84afc0SStefano Zampini     n          = P->cmap->n;
69224e84afc0SStefano Zampini     M          = A->rmap->N;
69234e84afc0SStefano Zampini     N          = P->cmap->N;
6924ddea5d60SJunchao Zhang     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
69254e84afc0SStefano Zampini     break;
69264e84afc0SStefano Zampini   case MATPRODUCT_AtB:
69274e84afc0SStefano Zampini     P          = product->A;
69284e84afc0SStefano Zampini     A          = product->B;
69294e84afc0SStefano Zampini     m          = P->cmap->n;
69304e84afc0SStefano Zampini     n          = A->cmap->n;
69314e84afc0SStefano Zampini     M          = P->cmap->N;
69324e84afc0SStefano Zampini     N          = A->cmap->N;
69334e84afc0SStefano Zampini     hasoffproc = PETSC_TRUE;
69344e84afc0SStefano Zampini     break;
69354e84afc0SStefano Zampini   case MATPRODUCT_PtAP:
69364e84afc0SStefano Zampini     A          = product->A;
69374e84afc0SStefano Zampini     P          = product->B;
69384e84afc0SStefano Zampini     m          = P->cmap->n;
69394e84afc0SStefano Zampini     n          = P->cmap->n;
69404e84afc0SStefano Zampini     M          = P->cmap->N;
69414e84afc0SStefano Zampini     N          = P->cmap->N;
69424e84afc0SStefano Zampini     hasoffproc = PETSC_TRUE;
69434e84afc0SStefano Zampini     break;
69449371c9d4SSatish Balay   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
69454e84afc0SStefano Zampini   }
69469566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
69474e84afc0SStefano Zampini   if (size == 1) hasoffproc = PETSC_FALSE;
69484e84afc0SStefano Zampini 
69494e84afc0SStefano Zampini   /* defaults */
69504e84afc0SStefano Zampini   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
69514e84afc0SStefano Zampini     mp[i]    = NULL;
69524e84afc0SStefano Zampini     mptmp[i] = PETSC_FALSE;
69534e84afc0SStefano Zampini     rmapt[i] = -1;
69544e84afc0SStefano Zampini     cmapt[i] = -1;
69554e84afc0SStefano Zampini     rmapa[i] = NULL;
69564e84afc0SStefano Zampini     cmapa[i] = NULL;
69574e84afc0SStefano Zampini   }
69584e84afc0SStefano Zampini 
69594e84afc0SStefano Zampini   /* customization */
69609566063dSJacob Faibussowitsch   PetscCall(PetscNew(&mmdata));
69614e84afc0SStefano Zampini   mmdata->reusesym = product->api_user;
69624e84afc0SStefano Zampini   if (ptype == MATPRODUCT_AB) {
69634e84afc0SStefano Zampini     if (product->api_user) {
6964d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
69659566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
69669566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6967d0609cedSBarry Smith       PetscOptionsEnd();
69684e84afc0SStefano Zampini     } else {
6969d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
69709566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
69719566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6972d0609cedSBarry Smith       PetscOptionsEnd();
6973abb89eb1SStefano Zampini     }
6974abb89eb1SStefano Zampini   } else if (ptype == MATPRODUCT_PtAP) {
6975abb89eb1SStefano Zampini     if (product->api_user) {
6976d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
69779566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6978d0609cedSBarry Smith       PetscOptionsEnd();
6979abb89eb1SStefano Zampini     } else {
6980d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
69819566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6982d0609cedSBarry Smith       PetscOptionsEnd();
69834e84afc0SStefano Zampini     }
69844e84afc0SStefano Zampini   }
69854e84afc0SStefano Zampini   a = (Mat_MPIAIJ *)A->data;
69864e84afc0SStefano Zampini   p = (Mat_MPIAIJ *)P->data;
69879566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, m, n, M, N));
69889566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(C->rmap));
69899566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(C->cmap));
69909566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
69919566063dSJacob Faibussowitsch   PetscCall(MatGetOptionsPrefix(C, &prefix));
6992ddea5d60SJunchao Zhang 
6993ddea5d60SJunchao Zhang   cp = 0;
69944e84afc0SStefano Zampini   switch (ptype) {
69954e84afc0SStefano Zampini   case MATPRODUCT_AB: /* A * P */
69969566063dSJacob Faibussowitsch     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
69974e84afc0SStefano Zampini 
6998ddea5d60SJunchao Zhang     /* A_diag * P_local (merged or not) */
6999ddea5d60SJunchao Zhang     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
70004e84afc0SStefano Zampini       /* P is product->B */
70019566063dSJacob Faibussowitsch       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
70029566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
70039566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
70049566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
70059566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
70069566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
70079566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
70084e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70099566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
70109566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70119566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob, &globidx));
70124e84afc0SStefano Zampini       rmapt[cp] = 1;
70134e84afc0SStefano Zampini       cmapt[cp] = 2;
70144e84afc0SStefano Zampini       cmapa[cp] = globidx;
70154e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70164e84afc0SStefano Zampini       cp++;
7017ddea5d60SJunchao Zhang     } else { /* A_diag * P_diag and A_diag * P_off */
70189566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
70199566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
70209566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
70219566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
70229566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
70239566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
70244e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70259566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
70269566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70274e84afc0SStefano Zampini       rmapt[cp] = 1;
70284e84afc0SStefano Zampini       cmapt[cp] = 1;
70294e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70304e84afc0SStefano Zampini       cp++;
70319566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
70329566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
70339566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
70349566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
70359566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
70369566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
70374e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70389566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
70399566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70404e84afc0SStefano Zampini       rmapt[cp] = 1;
70414e84afc0SStefano Zampini       cmapt[cp] = 2;
70424e84afc0SStefano Zampini       cmapa[cp] = p->garray;
70434e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70444e84afc0SStefano Zampini       cp++;
70454e84afc0SStefano Zampini     }
7046ddea5d60SJunchao Zhang 
7047ddea5d60SJunchao Zhang     /* A_off * P_other */
70484e84afc0SStefano Zampini     if (mmdata->P_oth) {
70499566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
70509566063dSJacob Faibussowitsch       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
70519566063dSJacob Faibussowitsch       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
70529566063dSJacob Faibussowitsch       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
70539566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
70549566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
70559566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
70569566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
70579566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
70589566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
70594e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70609566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
70619566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70624e84afc0SStefano Zampini       rmapt[cp] = 1;
70634e84afc0SStefano Zampini       cmapt[cp] = 2;
70644e84afc0SStefano Zampini       cmapa[cp] = P_oth_idx;
70654e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70664e84afc0SStefano Zampini       cp++;
70674e84afc0SStefano Zampini     }
70684e84afc0SStefano Zampini     break;
7069ddea5d60SJunchao Zhang 
70704e84afc0SStefano Zampini   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
70714e84afc0SStefano Zampini     /* A is product->B */
70729566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7073ddea5d60SJunchao Zhang     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
70749566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
70759566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
70769566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
70779566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
70789566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
70799566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
70804e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70819566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
70829566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
70839566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob, &globidx));
70844e84afc0SStefano Zampini       rmapt[cp] = 2;
70854e84afc0SStefano Zampini       rmapa[cp] = globidx;
70864e84afc0SStefano Zampini       cmapt[cp] = 2;
70874e84afc0SStefano Zampini       cmapa[cp] = globidx;
70884e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
70894e84afc0SStefano Zampini       cp++;
70904e84afc0SStefano Zampini     } else {
70919566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
70929566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
70939566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
70949566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
70959566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
70969566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
70974e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
70989566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
70999566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71009566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob, &globidx));
71014e84afc0SStefano Zampini       rmapt[cp] = 1;
71024e84afc0SStefano Zampini       cmapt[cp] = 2;
71034e84afc0SStefano Zampini       cmapa[cp] = globidx;
71044e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
71054e84afc0SStefano Zampini       cp++;
71069566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
71079566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
71089566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
71099566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
71109566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
71119566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
71124e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
71139566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
71149566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71154e84afc0SStefano Zampini       rmapt[cp] = 2;
71164e84afc0SStefano Zampini       rmapa[cp] = p->garray;
71174e84afc0SStefano Zampini       cmapt[cp] = 2;
71184e84afc0SStefano Zampini       cmapa[cp] = globidx;
71194e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
71204e84afc0SStefano Zampini       cp++;
71214e84afc0SStefano Zampini     }
71224e84afc0SStefano Zampini     break;
71234e84afc0SStefano Zampini   case MATPRODUCT_PtAP:
71249566063dSJacob Faibussowitsch     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
71254e84afc0SStefano Zampini     /* P is product->B */
71269566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
71279566063dSJacob Faibussowitsch     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
71289566063dSJacob Faibussowitsch     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
71299566063dSJacob Faibussowitsch     PetscCall(MatProductSetFill(mp[cp], product->fill));
71309566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
71319566063dSJacob Faibussowitsch     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
71329566063dSJacob Faibussowitsch     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
71334e84afc0SStefano Zampini     mp[cp]->product->api_user = product->api_user;
71349566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions(mp[cp]));
71359566063dSJacob Faibussowitsch     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71369566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(glob, &globidx));
71374e84afc0SStefano Zampini     rmapt[cp] = 2;
71384e84afc0SStefano Zampini     rmapa[cp] = globidx;
71394e84afc0SStefano Zampini     cmapt[cp] = 2;
71404e84afc0SStefano Zampini     cmapa[cp] = globidx;
71414e84afc0SStefano Zampini     mptmp[cp] = PETSC_FALSE;
71424e84afc0SStefano Zampini     cp++;
71434e84afc0SStefano Zampini     if (mmdata->P_oth) {
71449566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
71459566063dSJacob Faibussowitsch       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
71469566063dSJacob Faibussowitsch       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
71479566063dSJacob Faibussowitsch       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
71489566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
71499566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
71509566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
71519566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
71529566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
71539566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
71544e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
71559566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
71569566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71574e84afc0SStefano Zampini       mptmp[cp] = PETSC_TRUE;
71584e84afc0SStefano Zampini       cp++;
71599566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
71609566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
71619566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
71629566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
71639566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
71649566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
71654e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
71669566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
71679566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
71684e84afc0SStefano Zampini       rmapt[cp] = 2;
71694e84afc0SStefano Zampini       rmapa[cp] = globidx;
71704e84afc0SStefano Zampini       cmapt[cp] = 2;
71714e84afc0SStefano Zampini       cmapa[cp] = P_oth_idx;
71724e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
71734e84afc0SStefano Zampini       cp++;
71744e84afc0SStefano Zampini     }
71754e84afc0SStefano Zampini     break;
71769371c9d4SSatish Balay   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
71774e84afc0SStefano Zampini   }
71784e84afc0SStefano Zampini   /* sanity check */
71799371c9d4SSatish Balay   if (size > 1)
71809371c9d4SSatish Balay     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
71814e84afc0SStefano Zampini 
71829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7183ddea5d60SJunchao Zhang   for (i = 0; i < cp; i++) {
7184ddea5d60SJunchao Zhang     mmdata->mp[i]    = mp[i];
7185ddea5d60SJunchao Zhang     mmdata->mptmp[i] = mptmp[i];
7186ddea5d60SJunchao Zhang   }
71874e84afc0SStefano Zampini   mmdata->cp             = cp;
71884e84afc0SStefano Zampini   C->product->data       = mmdata;
71894e84afc0SStefano Zampini   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
71904e84afc0SStefano Zampini   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
71914e84afc0SStefano Zampini 
7192c215019aSStefano Zampini   /* memory type */
7193c215019aSStefano Zampini   mmdata->mtype = PETSC_MEMTYPE_HOST;
71949566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
71959566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7196c215019aSStefano Zampini   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
71973214990dSStefano Zampini   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7198c215019aSStefano Zampini 
71994e84afc0SStefano Zampini   /* prepare coo coordinates for values insertion */
7200ddea5d60SJunchao Zhang 
7201ddea5d60SJunchao Zhang   /* count total nonzeros of those intermediate seqaij Mats
7202ddea5d60SJunchao Zhang     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7203ddea5d60SJunchao Zhang     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7204ddea5d60SJunchao Zhang     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7205ddea5d60SJunchao Zhang   */
72064e84afc0SStefano Zampini   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
72074e84afc0SStefano Zampini     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
72084e84afc0SStefano Zampini     if (mptmp[cp]) continue;
7209ddea5d60SJunchao Zhang     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
72104e84afc0SStefano Zampini       const PetscInt *rmap = rmapa[cp];
72114e84afc0SStefano Zampini       const PetscInt  mr   = mp[cp]->rmap->n;
72124e84afc0SStefano Zampini       const PetscInt  rs   = C->rmap->rstart;
72134e84afc0SStefano Zampini       const PetscInt  re   = C->rmap->rend;
72144e84afc0SStefano Zampini       const PetscInt *ii   = mm->i;
72154e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
72164e84afc0SStefano Zampini         const PetscInt gr = rmap[i];
72174e84afc0SStefano Zampini         const PetscInt nz = ii[i + 1] - ii[i];
7218ddea5d60SJunchao Zhang         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7219ddea5d60SJunchao Zhang         else ncoo_oown += nz;                  /* this row is local */
72204e84afc0SStefano Zampini       }
72214e84afc0SStefano Zampini     } else ncoo_d += mm->nz;
72224e84afc0SStefano Zampini   }
7223ddea5d60SJunchao Zhang 
7224ddea5d60SJunchao Zhang   /*
7225ddea5d60SJunchao Zhang     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7226ddea5d60SJunchao Zhang 
7227ddea5d60SJunchao Zhang     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7228ddea5d60SJunchao Zhang 
7229ddea5d60SJunchao Zhang     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7230ddea5d60SJunchao Zhang 
7231ddea5d60SJunchao Zhang     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7232ddea5d60SJunchao Zhang     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7233ddea5d60SJunchao Zhang     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7234ddea5d60SJunchao Zhang 
7235ddea5d60SJunchao Zhang     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7236ddea5d60SJunchao Zhang     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7237ddea5d60SJunchao Zhang   */
72389566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
72399566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7240ddea5d60SJunchao Zhang 
7241ddea5d60SJunchao Zhang   /* gather (i,j) of nonzeros inserted by remote procs */
7242ddea5d60SJunchao Zhang   if (hasoffproc) {
72434e84afc0SStefano Zampini     PetscSF  msf;
72444e84afc0SStefano Zampini     PetscInt ncoo2, *coo_i2, *coo_j2;
72454e84afc0SStefano Zampini 
72469566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
72479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
72489566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7249ddea5d60SJunchao Zhang 
72504e84afc0SStefano Zampini     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
72514e84afc0SStefano Zampini       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
72524e84afc0SStefano Zampini       PetscInt   *idxoff = mmdata->off[cp];
72534e84afc0SStefano Zampini       PetscInt   *idxown = mmdata->own[cp];
7254ddea5d60SJunchao Zhang       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
72554e84afc0SStefano Zampini         const PetscInt *rmap = rmapa[cp];
72564e84afc0SStefano Zampini         const PetscInt *cmap = cmapa[cp];
72574e84afc0SStefano Zampini         const PetscInt *ii   = mm->i;
72584e84afc0SStefano Zampini         PetscInt       *coi  = coo_i + ncoo_o;
72594e84afc0SStefano Zampini         PetscInt       *coj  = coo_j + ncoo_o;
72604e84afc0SStefano Zampini         const PetscInt  mr   = mp[cp]->rmap->n;
72614e84afc0SStefano Zampini         const PetscInt  rs   = C->rmap->rstart;
72624e84afc0SStefano Zampini         const PetscInt  re   = C->rmap->rend;
72634e84afc0SStefano Zampini         const PetscInt  cs   = C->cmap->rstart;
72644e84afc0SStefano Zampini         for (i = 0; i < mr; i++) {
72654e84afc0SStefano Zampini           const PetscInt *jj = mm->j + ii[i];
72664e84afc0SStefano Zampini           const PetscInt  gr = rmap[i];
72674e84afc0SStefano Zampini           const PetscInt  nz = ii[i + 1] - ii[i];
7268ddea5d60SJunchao Zhang           if (gr < rs || gr >= re) { /* this is an offproc row */
72694e84afc0SStefano Zampini             for (j = ii[i]; j < ii[i + 1]; j++) {
72704e84afc0SStefano Zampini               *coi++    = gr;
72714e84afc0SStefano Zampini               *idxoff++ = j;
72724e84afc0SStefano Zampini             }
72734e84afc0SStefano Zampini             if (!cmapt[cp]) { /* already global */
72744e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = jj[j];
72754e84afc0SStefano Zampini             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
72764e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
72774e84afc0SStefano Zampini             } else { /* offdiag */
72784e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
72794e84afc0SStefano Zampini             }
72804e84afc0SStefano Zampini             ncoo_o += nz;
7281ddea5d60SJunchao Zhang           } else { /* this is a local row */
72824e84afc0SStefano Zampini             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
72834e84afc0SStefano Zampini           }
72844e84afc0SStefano Zampini         }
72854e84afc0SStefano Zampini       }
72864e84afc0SStefano Zampini       mmdata->off[cp + 1] = idxoff;
72874e84afc0SStefano Zampini       mmdata->own[cp + 1] = idxown;
72884e84afc0SStefano Zampini     }
72894e84afc0SStefano Zampini 
72909566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
72919566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
72929566063dSJacob Faibussowitsch     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
72939566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
72944e84afc0SStefano Zampini     ncoo = ncoo_d + ncoo_oown + ncoo2;
72959566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
72969566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
72979566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
72989566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
72999566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
73009566063dSJacob Faibussowitsch     PetscCall(PetscFree2(coo_i, coo_j));
7301ddea5d60SJunchao Zhang     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
73029566063dSJacob Faibussowitsch     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
73034e84afc0SStefano Zampini     coo_i = coo_i2;
73044e84afc0SStefano Zampini     coo_j = coo_j2;
73054e84afc0SStefano Zampini   } else { /* no offproc values insertion */
73064e84afc0SStefano Zampini     ncoo = ncoo_d;
73079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7308c215019aSStefano Zampini 
73099566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
73109566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
73119566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(mmdata->sf));
73124e84afc0SStefano Zampini   }
7313c215019aSStefano Zampini   mmdata->hasoffproc = hasoffproc;
73144e84afc0SStefano Zampini 
7315ddea5d60SJunchao Zhang   /* gather (i,j) of nonzeros inserted locally */
73164e84afc0SStefano Zampini   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
73174e84afc0SStefano Zampini     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
73184e84afc0SStefano Zampini     PetscInt       *coi  = coo_i + ncoo_d;
73194e84afc0SStefano Zampini     PetscInt       *coj  = coo_j + ncoo_d;
73204e84afc0SStefano Zampini     const PetscInt *jj   = mm->j;
73214e84afc0SStefano Zampini     const PetscInt *ii   = mm->i;
73224e84afc0SStefano Zampini     const PetscInt *cmap = cmapa[cp];
73234e84afc0SStefano Zampini     const PetscInt *rmap = rmapa[cp];
73244e84afc0SStefano Zampini     const PetscInt  mr   = mp[cp]->rmap->n;
73254e84afc0SStefano Zampini     const PetscInt  rs   = C->rmap->rstart;
73264e84afc0SStefano Zampini     const PetscInt  re   = C->rmap->rend;
73274e84afc0SStefano Zampini     const PetscInt  cs   = C->cmap->rstart;
73284e84afc0SStefano Zampini 
73294e84afc0SStefano Zampini     if (mptmp[cp]) continue;
7330ddea5d60SJunchao Zhang     if (rmapt[cp] == 1) { /* consecutive rows */
7331ddea5d60SJunchao Zhang       /* fill coo_i */
73324e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
73334e84afc0SStefano Zampini         const PetscInt gr = i + rs;
73344e84afc0SStefano Zampini         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
73354e84afc0SStefano Zampini       }
7336ddea5d60SJunchao Zhang       /* fill coo_j */
7337ddea5d60SJunchao Zhang       if (!cmapt[cp]) { /* type-0, already global */
73389566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7339ddea5d60SJunchao Zhang       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7340ddea5d60SJunchao Zhang         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7341ddea5d60SJunchao Zhang       } else {                                            /* type-2, local to global for sparse columns */
73424e84afc0SStefano Zampini         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
73434e84afc0SStefano Zampini       }
73444e84afc0SStefano Zampini       ncoo_d += mm->nz;
7345ddea5d60SJunchao Zhang     } else if (rmapt[cp] == 2) { /* sparse rows */
73464e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
73474e84afc0SStefano Zampini         const PetscInt *jj = mm->j + ii[i];
73484e84afc0SStefano Zampini         const PetscInt  gr = rmap[i];
73494e84afc0SStefano Zampini         const PetscInt  nz = ii[i + 1] - ii[i];
7350ddea5d60SJunchao Zhang         if (gr >= rs && gr < re) { /* local rows */
73514e84afc0SStefano Zampini           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7352ddea5d60SJunchao Zhang           if (!cmapt[cp]) { /* type-0, already global */
73534e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = jj[j];
73544e84afc0SStefano Zampini           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
73554e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7356ddea5d60SJunchao Zhang           } else { /* type-2, local to global for sparse columns */
73574e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
73584e84afc0SStefano Zampini           }
73594e84afc0SStefano Zampini           ncoo_d += nz;
73604e84afc0SStefano Zampini         }
73614e84afc0SStefano Zampini       }
73624e84afc0SStefano Zampini     }
73634e84afc0SStefano Zampini   }
736448a46eb9SPierre Jolivet   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
73659566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&glob));
736648a46eb9SPierre Jolivet   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
73679566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7368ddea5d60SJunchao Zhang   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
73699566063dSJacob Faibussowitsch   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
73704e84afc0SStefano Zampini 
73714e84afc0SStefano Zampini   /* preallocate with COO data */
73729566063dSJacob Faibussowitsch   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
73739566063dSJacob Faibussowitsch   PetscCall(PetscFree2(coo_i, coo_j));
73744e84afc0SStefano Zampini   PetscFunctionReturn(0);
73754e84afc0SStefano Zampini }
73764e84afc0SStefano Zampini 
73779371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) {
73784e84afc0SStefano Zampini   Mat_Product *product = mat->product;
73794e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
73804e84afc0SStefano Zampini   PetscBool match  = PETSC_FALSE;
7381abb89eb1SStefano Zampini   PetscBool usecpu = PETSC_FALSE;
73824e84afc0SStefano Zampini #else
73834e84afc0SStefano Zampini   PetscBool match = PETSC_TRUE;
73844e84afc0SStefano Zampini #endif
73854e84afc0SStefano Zampini 
73864e84afc0SStefano Zampini   PetscFunctionBegin;
73874e84afc0SStefano Zampini   MatCheckProduct(mat, 1);
73884e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
738948a46eb9SPierre Jolivet   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
739065e4b4d4SStefano Zampini   if (match) { /* we can always fallback to the CPU if requested */
7391abb89eb1SStefano Zampini     switch (product->type) {
7392abb89eb1SStefano Zampini     case MATPRODUCT_AB:
7393abb89eb1SStefano Zampini       if (product->api_user) {
7394d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
73959566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7396d0609cedSBarry Smith         PetscOptionsEnd();
7397abb89eb1SStefano Zampini       } else {
7398d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
73999566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7400d0609cedSBarry Smith         PetscOptionsEnd();
7401abb89eb1SStefano Zampini       }
7402abb89eb1SStefano Zampini       break;
7403abb89eb1SStefano Zampini     case MATPRODUCT_AtB:
7404abb89eb1SStefano Zampini       if (product->api_user) {
7405d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
74069566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7407d0609cedSBarry Smith         PetscOptionsEnd();
7408abb89eb1SStefano Zampini       } else {
7409d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
74109566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7411d0609cedSBarry Smith         PetscOptionsEnd();
7412abb89eb1SStefano Zampini       }
7413abb89eb1SStefano Zampini       break;
7414abb89eb1SStefano Zampini     case MATPRODUCT_PtAP:
7415abb89eb1SStefano Zampini       if (product->api_user) {
7416d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
74179566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7418d0609cedSBarry Smith         PetscOptionsEnd();
7419abb89eb1SStefano Zampini       } else {
7420d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
74219566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7422d0609cedSBarry Smith         PetscOptionsEnd();
7423abb89eb1SStefano Zampini       }
7424abb89eb1SStefano Zampini       break;
74259371c9d4SSatish Balay     default: break;
7426abb89eb1SStefano Zampini     }
7427abb89eb1SStefano Zampini     match = (PetscBool)!usecpu;
7428abb89eb1SStefano Zampini   }
74294e84afc0SStefano Zampini #endif
74304e84afc0SStefano Zampini   if (match) {
74314e84afc0SStefano Zampini     switch (product->type) {
74324e84afc0SStefano Zampini     case MATPRODUCT_AB:
74334e84afc0SStefano Zampini     case MATPRODUCT_AtB:
74349371c9d4SSatish Balay     case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break;
74359371c9d4SSatish Balay     default: break;
74364e84afc0SStefano Zampini     }
74374e84afc0SStefano Zampini   }
74384e84afc0SStefano Zampini   /* fallback to MPIAIJ ops */
74399566063dSJacob Faibussowitsch   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
74404e84afc0SStefano Zampini   PetscFunctionReturn(0);
744181824310SBarry Smith }
744298921bdaSJacob Faibussowitsch 
744398921bdaSJacob Faibussowitsch /*
744472833a62Smarkadams4    Produces a set of block column indices of the matrix row, one for each block represented in the original row
744572833a62Smarkadams4 
744672833a62Smarkadams4    n - the number of block indices in cc[]
744772833a62Smarkadams4    cc - the block indices (must be large enough to contain the indices)
744872833a62Smarkadams4 */
74499371c9d4SSatish Balay static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) {
745072833a62Smarkadams4   PetscInt        cnt = -1, nidx, j;
745172833a62Smarkadams4   const PetscInt *idx;
745272833a62Smarkadams4 
745372833a62Smarkadams4   PetscFunctionBegin;
745472833a62Smarkadams4   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
745572833a62Smarkadams4   if (nidx) {
745672833a62Smarkadams4     cnt     = 0;
745772833a62Smarkadams4     cc[cnt] = idx[0] / bs;
745872833a62Smarkadams4     for (j = 1; j < nidx; j++) {
745972833a62Smarkadams4       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
746072833a62Smarkadams4     }
746172833a62Smarkadams4   }
746272833a62Smarkadams4   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
746372833a62Smarkadams4   *n = cnt + 1;
746472833a62Smarkadams4   PetscFunctionReturn(0);
746572833a62Smarkadams4 }
746672833a62Smarkadams4 
746772833a62Smarkadams4 /*
746872833a62Smarkadams4     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
746972833a62Smarkadams4 
747072833a62Smarkadams4     ncollapsed - the number of block indices
747172833a62Smarkadams4     collapsed - the block indices (must be large enough to contain the indices)
747272833a62Smarkadams4 */
74739371c9d4SSatish Balay static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) {
747472833a62Smarkadams4   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
747572833a62Smarkadams4 
747672833a62Smarkadams4   PetscFunctionBegin;
747772833a62Smarkadams4   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
747872833a62Smarkadams4   for (i = start + 1; i < start + bs; i++) {
747972833a62Smarkadams4     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
748072833a62Smarkadams4     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
74819371c9d4SSatish Balay     cprevtmp = cprev;
74829371c9d4SSatish Balay     cprev    = merged;
74839371c9d4SSatish Balay     merged   = cprevtmp;
748472833a62Smarkadams4   }
748572833a62Smarkadams4   *ncollapsed = nprev;
748672833a62Smarkadams4   if (collapsed) *collapsed = cprev;
748772833a62Smarkadams4   PetscFunctionReturn(0);
748872833a62Smarkadams4 }
748972833a62Smarkadams4 
749072833a62Smarkadams4 /* -------------------------------------------------------------------------- */
749172833a62Smarkadams4 /*
749272833a62Smarkadams4  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
749372833a62Smarkadams4 
749472833a62Smarkadams4  Input Parameter:
749572833a62Smarkadams4  . Amat - matrix
749672833a62Smarkadams4  - symmetrize - make the result symmetric
749772833a62Smarkadams4  + scale - scale with diagonal
749872833a62Smarkadams4 
749972833a62Smarkadams4  Output Parameter:
750072833a62Smarkadams4  . a_Gmat - output scalar graph >= 0
750172833a62Smarkadams4 
750272833a62Smarkadams4  */
75039371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) {
750472833a62Smarkadams4   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
750572833a62Smarkadams4   MPI_Comm  comm;
750672833a62Smarkadams4   Mat       Gmat;
750772833a62Smarkadams4   PetscBool ismpiaij, isseqaij;
750872833a62Smarkadams4   Mat       a, b, c;
750972833a62Smarkadams4   MatType   jtype;
751072833a62Smarkadams4 
751172833a62Smarkadams4   PetscFunctionBegin;
751272833a62Smarkadams4   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
751372833a62Smarkadams4   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
751472833a62Smarkadams4   PetscCall(MatGetSize(Amat, &MM, &NN));
751572833a62Smarkadams4   PetscCall(MatGetBlockSize(Amat, &bs));
751672833a62Smarkadams4   nloc = (Iend - Istart) / bs;
751772833a62Smarkadams4 
751872833a62Smarkadams4   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
751972833a62Smarkadams4   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
752072833a62Smarkadams4   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
752172833a62Smarkadams4 
752272833a62Smarkadams4   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
752372833a62Smarkadams4   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
752472833a62Smarkadams4      implementation */
752572833a62Smarkadams4   if (bs > 1) {
752672833a62Smarkadams4     PetscCall(MatGetType(Amat, &jtype));
752772833a62Smarkadams4     PetscCall(MatCreate(comm, &Gmat));
752872833a62Smarkadams4     PetscCall(MatSetType(Gmat, jtype));
752972833a62Smarkadams4     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
753072833a62Smarkadams4     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
753172833a62Smarkadams4     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
753272833a62Smarkadams4       PetscInt  *d_nnz, *o_nnz;
753372833a62Smarkadams4       MatScalar *aa, val, AA[4096];
753472833a62Smarkadams4       PetscInt  *aj, *ai, AJ[4096], nc;
75359371c9d4SSatish Balay       if (isseqaij) {
75369371c9d4SSatish Balay         a = Amat;
75379371c9d4SSatish Balay         b = NULL;
75389371c9d4SSatish Balay       } else {
753972833a62Smarkadams4         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
75409371c9d4SSatish Balay         a             = d->A;
75419371c9d4SSatish Balay         b             = d->B;
754272833a62Smarkadams4       }
754372833a62Smarkadams4       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
754472833a62Smarkadams4       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
754572833a62Smarkadams4       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
754672833a62Smarkadams4         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
754772833a62Smarkadams4         const PetscInt *cols;
754872833a62Smarkadams4         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
754972833a62Smarkadams4           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
755072833a62Smarkadams4           nnz[brow / bs] = jj / bs;
755172833a62Smarkadams4           if (jj % bs) ok = 0;
755272833a62Smarkadams4           if (cols) j0 = cols[0];
755372833a62Smarkadams4           else j0 = -1;
755472833a62Smarkadams4           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
755572833a62Smarkadams4           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
755672833a62Smarkadams4           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
755772833a62Smarkadams4             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
755872833a62Smarkadams4             if (jj % bs) ok = 0;
755972833a62Smarkadams4             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
756072833a62Smarkadams4             if (nnz[brow / bs] != jj / bs) ok = 0;
756172833a62Smarkadams4             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
756272833a62Smarkadams4           }
756372833a62Smarkadams4           if (!ok) {
756472833a62Smarkadams4             PetscCall(PetscFree2(d_nnz, o_nnz));
756572833a62Smarkadams4             goto old_bs;
756672833a62Smarkadams4           }
756772833a62Smarkadams4         }
756872833a62Smarkadams4         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
756972833a62Smarkadams4       }
757072833a62Smarkadams4       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
757172833a62Smarkadams4       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
757272833a62Smarkadams4       PetscCall(PetscFree2(d_nnz, o_nnz));
757372833a62Smarkadams4       // diag
757472833a62Smarkadams4       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
757572833a62Smarkadams4         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
757672833a62Smarkadams4         ai               = aseq->i;
757772833a62Smarkadams4         n                = ai[brow + 1] - ai[brow];
757872833a62Smarkadams4         aj               = aseq->j + ai[brow];
757972833a62Smarkadams4         for (int k = 0; k < n; k += bs) {        // block columns
758072833a62Smarkadams4           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
758172833a62Smarkadams4           val        = 0;
758272833a62Smarkadams4           for (int ii = 0; ii < bs; ii++) { // rows in block
758372833a62Smarkadams4             aa = aseq->a + ai[brow + ii] + k;
758472833a62Smarkadams4             for (int jj = 0; jj < bs; jj++) {         // columns in block
758572833a62Smarkadams4               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
758672833a62Smarkadams4             }
758772833a62Smarkadams4           }
758872833a62Smarkadams4           AA[k / bs] = val;
758972833a62Smarkadams4         }
759072833a62Smarkadams4         grow = Istart / bs + brow / bs;
759172833a62Smarkadams4         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
759272833a62Smarkadams4       }
759372833a62Smarkadams4       // off-diag
759472833a62Smarkadams4       if (ismpiaij) {
759572833a62Smarkadams4         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
759672833a62Smarkadams4         const PetscScalar *vals;
759772833a62Smarkadams4         const PetscInt    *cols, *garray = aij->garray;
759872833a62Smarkadams4         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
759972833a62Smarkadams4         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
760072833a62Smarkadams4           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
760172833a62Smarkadams4           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
760272833a62Smarkadams4             AA[k / bs] = 0;
760372833a62Smarkadams4             AJ[cidx]   = garray[cols[k]] / bs;
760472833a62Smarkadams4           }
760572833a62Smarkadams4           nc = ncols / bs;
760672833a62Smarkadams4           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
760772833a62Smarkadams4           for (int ii = 0; ii < bs; ii++) { // rows in block
760872833a62Smarkadams4             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
760972833a62Smarkadams4             for (int k = 0; k < ncols; k += bs) {
761072833a62Smarkadams4               for (int jj = 0; jj < bs; jj++) { // cols in block
761172833a62Smarkadams4                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
761272833a62Smarkadams4               }
761372833a62Smarkadams4             }
761472833a62Smarkadams4             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
761572833a62Smarkadams4           }
761672833a62Smarkadams4           grow = Istart / bs + brow / bs;
761772833a62Smarkadams4           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
761872833a62Smarkadams4         }
761972833a62Smarkadams4       }
762072833a62Smarkadams4       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
762172833a62Smarkadams4       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
762272833a62Smarkadams4     } else {
762372833a62Smarkadams4       const PetscScalar *vals;
762472833a62Smarkadams4       const PetscInt    *idx;
762572833a62Smarkadams4       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
762672833a62Smarkadams4     old_bs:
762772833a62Smarkadams4       /*
762872833a62Smarkadams4        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
762972833a62Smarkadams4        */
763072833a62Smarkadams4       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
763172833a62Smarkadams4       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
763272833a62Smarkadams4       if (isseqaij) {
763372833a62Smarkadams4         PetscInt max_d_nnz;
763472833a62Smarkadams4         /*
763572833a62Smarkadams4          Determine exact preallocation count for (sequential) scalar matrix
763672833a62Smarkadams4          */
763772833a62Smarkadams4         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
763872833a62Smarkadams4         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
763972833a62Smarkadams4         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
764048a46eb9SPierre Jolivet         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
764172833a62Smarkadams4         PetscCall(PetscFree3(w0, w1, w2));
764272833a62Smarkadams4       } else if (ismpiaij) {
764372833a62Smarkadams4         Mat             Daij, Oaij;
764472833a62Smarkadams4         const PetscInt *garray;
764572833a62Smarkadams4         PetscInt        max_d_nnz;
764672833a62Smarkadams4         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
764772833a62Smarkadams4         /*
764872833a62Smarkadams4          Determine exact preallocation count for diagonal block portion of scalar matrix
764972833a62Smarkadams4          */
765072833a62Smarkadams4         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
765172833a62Smarkadams4         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
765272833a62Smarkadams4         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
765348a46eb9SPierre Jolivet         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
765472833a62Smarkadams4         PetscCall(PetscFree3(w0, w1, w2));
765572833a62Smarkadams4         /*
765672833a62Smarkadams4          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
765772833a62Smarkadams4          */
765872833a62Smarkadams4         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
765972833a62Smarkadams4           o_nnz[jj] = 0;
766072833a62Smarkadams4           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
766172833a62Smarkadams4             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
766272833a62Smarkadams4             o_nnz[jj] += ncols;
766372833a62Smarkadams4             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
766472833a62Smarkadams4           }
766572833a62Smarkadams4           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
766672833a62Smarkadams4         }
766772833a62Smarkadams4       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
766872833a62Smarkadams4       /* get scalar copy (norms) of matrix */
766972833a62Smarkadams4       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
767072833a62Smarkadams4       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
767172833a62Smarkadams4       PetscCall(PetscFree2(d_nnz, o_nnz));
767272833a62Smarkadams4       for (Ii = Istart; Ii < Iend; Ii++) {
767372833a62Smarkadams4         PetscInt dest_row = Ii / bs;
767472833a62Smarkadams4         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
767572833a62Smarkadams4         for (jj = 0; jj < ncols; jj++) {
767672833a62Smarkadams4           PetscInt    dest_col = idx[jj] / bs;
767772833a62Smarkadams4           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
767872833a62Smarkadams4           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
767972833a62Smarkadams4         }
768072833a62Smarkadams4         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
768172833a62Smarkadams4       }
768272833a62Smarkadams4       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
768372833a62Smarkadams4       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
768472833a62Smarkadams4     }
768572833a62Smarkadams4   } else {
768672833a62Smarkadams4     /* TODO GPU: optimization proposal, each class provides fast implementation of this
768772833a62Smarkadams4      procedure via MatAbs API */
768872833a62Smarkadams4     /* just copy scalar matrix & abs() */
768972833a62Smarkadams4     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
76909371c9d4SSatish Balay     if (isseqaij) {
76919371c9d4SSatish Balay       a = Gmat;
76929371c9d4SSatish Balay       b = NULL;
76939371c9d4SSatish Balay     } else {
769472833a62Smarkadams4       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
76959371c9d4SSatish Balay       a             = d->A;
76969371c9d4SSatish Balay       b             = d->B;
769772833a62Smarkadams4     }
769872833a62Smarkadams4     /* abs */
769972833a62Smarkadams4     for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
770072833a62Smarkadams4       MatInfo      info;
770172833a62Smarkadams4       PetscScalar *avals;
770272833a62Smarkadams4       PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
770372833a62Smarkadams4       PetscCall(MatSeqAIJGetArray(c, &avals));
770472833a62Smarkadams4       for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
770572833a62Smarkadams4       PetscCall(MatSeqAIJRestoreArray(c, &avals));
770672833a62Smarkadams4     }
770772833a62Smarkadams4   }
770872833a62Smarkadams4   if (symmetrize) {
7709b94d7dedSBarry Smith     PetscBool isset, issym;
7710b94d7dedSBarry Smith     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7711b94d7dedSBarry Smith     if (!isset || !issym) {
771272833a62Smarkadams4       Mat matTrans;
771372833a62Smarkadams4       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
77141fcb517eSBarry Smith       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
771572833a62Smarkadams4       PetscCall(MatDestroy(&matTrans));
771672833a62Smarkadams4     }
771772833a62Smarkadams4     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
771872833a62Smarkadams4   } else {
771972833a62Smarkadams4     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
772072833a62Smarkadams4   }
772172833a62Smarkadams4   if (scale) {
772272833a62Smarkadams4     /* scale c for all diagonal values = 1 or -1 */
772372833a62Smarkadams4     Vec diag;
772472833a62Smarkadams4     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
772572833a62Smarkadams4     PetscCall(MatGetDiagonal(Gmat, diag));
772672833a62Smarkadams4     PetscCall(VecReciprocal(diag));
772772833a62Smarkadams4     PetscCall(VecSqrtAbs(diag));
772872833a62Smarkadams4     PetscCall(MatDiagonalScale(Gmat, diag, diag));
772972833a62Smarkadams4     PetscCall(VecDestroy(&diag));
773072833a62Smarkadams4   }
773172833a62Smarkadams4   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
773272833a62Smarkadams4   *a_Gmat = Gmat;
773372833a62Smarkadams4   PetscFunctionReturn(0);
773472833a62Smarkadams4 }
773572833a62Smarkadams4 
77369371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) {
773772833a62Smarkadams4   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
773872833a62Smarkadams4   Mat                tGmat;
773972833a62Smarkadams4   MPI_Comm           comm;
774072833a62Smarkadams4   const PetscScalar *vals;
774172833a62Smarkadams4   const PetscInt    *idx;
774272833a62Smarkadams4   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
774372833a62Smarkadams4   MatScalar         *AA; // this is checked in graph
774472833a62Smarkadams4   PetscBool          isseqaij;
774572833a62Smarkadams4   Mat                a, b, c;
774672833a62Smarkadams4   MatType            jtype;
774772833a62Smarkadams4 
774872833a62Smarkadams4   PetscFunctionBegin;
774972833a62Smarkadams4   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
775072833a62Smarkadams4   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
775172833a62Smarkadams4   PetscCall(MatGetType(Gmat, &jtype));
775272833a62Smarkadams4   PetscCall(MatCreate(comm, &tGmat));
775372833a62Smarkadams4   PetscCall(MatSetType(tGmat, jtype));
775472833a62Smarkadams4 
775572833a62Smarkadams4   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
775672833a62Smarkadams4                Also, if the matrix is symmetric, can we skip this
775772833a62Smarkadams4                operation? It can be very expensive on large matrices. */
775872833a62Smarkadams4 
775972833a62Smarkadams4   // global sizes
776072833a62Smarkadams4   PetscCall(MatGetSize(Gmat, &MM, &NN));
776172833a62Smarkadams4   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
776272833a62Smarkadams4   nloc = Iend - Istart;
776372833a62Smarkadams4   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
77649371c9d4SSatish Balay   if (isseqaij) {
77659371c9d4SSatish Balay     a = Gmat;
77669371c9d4SSatish Balay     b = NULL;
77679371c9d4SSatish Balay   } else {
776872833a62Smarkadams4     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
77699371c9d4SSatish Balay     a             = d->A;
77709371c9d4SSatish Balay     b             = d->B;
777172833a62Smarkadams4     garray        = d->garray;
777272833a62Smarkadams4   }
777372833a62Smarkadams4   /* Determine upper bound on non-zeros needed in new filtered matrix */
777472833a62Smarkadams4   for (PetscInt row = 0; row < nloc; row++) {
777572833a62Smarkadams4     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
777672833a62Smarkadams4     d_nnz[row] = ncols;
777772833a62Smarkadams4     if (ncols > maxcols) maxcols = ncols;
777872833a62Smarkadams4     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
777972833a62Smarkadams4   }
778072833a62Smarkadams4   if (b) {
778172833a62Smarkadams4     for (PetscInt row = 0; row < nloc; row++) {
778272833a62Smarkadams4       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
778372833a62Smarkadams4       o_nnz[row] = ncols;
778472833a62Smarkadams4       if (ncols > maxcols) maxcols = ncols;
778572833a62Smarkadams4       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
778672833a62Smarkadams4     }
778772833a62Smarkadams4   }
778872833a62Smarkadams4   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
778972833a62Smarkadams4   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
779072833a62Smarkadams4   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
779172833a62Smarkadams4   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
779272833a62Smarkadams4   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
779372833a62Smarkadams4   PetscCall(PetscFree2(d_nnz, o_nnz));
779472833a62Smarkadams4   //
779572833a62Smarkadams4   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
779672833a62Smarkadams4   nnz0 = nnz1 = 0;
779772833a62Smarkadams4   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
779872833a62Smarkadams4     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
779972833a62Smarkadams4       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
780072833a62Smarkadams4       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
780172833a62Smarkadams4         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
780272833a62Smarkadams4         if (PetscRealPart(sv) > vfilter) {
780372833a62Smarkadams4           nnz1++;
780472833a62Smarkadams4           PetscInt cid = idx[jj] + Istart; //diag
780572833a62Smarkadams4           if (c != a) cid = garray[idx[jj]];
780672833a62Smarkadams4           AA[ncol_row] = vals[jj];
780772833a62Smarkadams4           AJ[ncol_row] = cid;
780872833a62Smarkadams4           ncol_row++;
780972833a62Smarkadams4         }
781072833a62Smarkadams4       }
781172833a62Smarkadams4       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
781272833a62Smarkadams4       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
781372833a62Smarkadams4     }
781472833a62Smarkadams4   }
781572833a62Smarkadams4   PetscCall(PetscFree2(AA, AJ));
781672833a62Smarkadams4   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
781772833a62Smarkadams4   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
781872833a62Smarkadams4   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
781972833a62Smarkadams4 
78209371c9d4SSatish Balay   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
782172833a62Smarkadams4 
782272833a62Smarkadams4   *filteredG = tGmat;
782372833a62Smarkadams4   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
782472833a62Smarkadams4   PetscFunctionReturn(0);
782572833a62Smarkadams4 }
782672833a62Smarkadams4 
782772833a62Smarkadams4 /*
782898921bdaSJacob Faibussowitsch     Special version for direct calls from Fortran
782998921bdaSJacob Faibussowitsch */
783098921bdaSJacob Faibussowitsch #include <petsc/private/fortranimpl.h>
783198921bdaSJacob Faibussowitsch 
783298921bdaSJacob Faibussowitsch /* Change these macros so can be used in void function */
78339566063dSJacob Faibussowitsch /* Identical to PetscCallVoid, except it assigns to *_ierr */
78349566063dSJacob Faibussowitsch #undef PetscCall
78359371c9d4SSatish Balay #define PetscCall(...) \
78369371c9d4SSatish Balay   do { \
78375f80ce2aSJacob Faibussowitsch     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
783898921bdaSJacob Faibussowitsch     if (PetscUnlikely(ierr_msv_mpiaij)) { \
783998921bdaSJacob Faibussowitsch       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
784098921bdaSJacob Faibussowitsch       return; \
784198921bdaSJacob Faibussowitsch     } \
784298921bdaSJacob Faibussowitsch   } while (0)
784398921bdaSJacob Faibussowitsch 
784498921bdaSJacob Faibussowitsch #undef SETERRQ
78459371c9d4SSatish Balay #define SETERRQ(comm, ierr, ...) \
78469371c9d4SSatish Balay   do { \
784798921bdaSJacob Faibussowitsch     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
784898921bdaSJacob Faibussowitsch     return; \
784998921bdaSJacob Faibussowitsch   } while (0)
785098921bdaSJacob Faibussowitsch 
785198921bdaSJacob Faibussowitsch #if defined(PETSC_HAVE_FORTRAN_CAPS)
785298921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
785398921bdaSJacob Faibussowitsch #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
785498921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ matsetvaluesmpiaij
785598921bdaSJacob Faibussowitsch #else
785698921bdaSJacob Faibussowitsch #endif
78579371c9d4SSatish Balay PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) {
785898921bdaSJacob Faibussowitsch   Mat         mat = *mmat;
785998921bdaSJacob Faibussowitsch   PetscInt    m = *mm, n = *mn;
786098921bdaSJacob Faibussowitsch   InsertMode  addv = *maddv;
786198921bdaSJacob Faibussowitsch   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
786298921bdaSJacob Faibussowitsch   PetscScalar value;
786398921bdaSJacob Faibussowitsch 
786498921bdaSJacob Faibussowitsch   MatCheckPreallocated(mat, 1);
786598921bdaSJacob Faibussowitsch   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
78665f80ce2aSJacob Faibussowitsch   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
786798921bdaSJacob Faibussowitsch   {
786898921bdaSJacob Faibussowitsch     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
786998921bdaSJacob Faibussowitsch     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
787098921bdaSJacob Faibussowitsch     PetscBool roworiented = aij->roworiented;
787198921bdaSJacob Faibussowitsch 
787298921bdaSJacob Faibussowitsch     /* Some Variables required in the macro */
787398921bdaSJacob Faibussowitsch     Mat                    A     = aij->A;
787498921bdaSJacob Faibussowitsch     Mat_SeqAIJ            *a     = (Mat_SeqAIJ *)A->data;
787598921bdaSJacob Faibussowitsch     PetscInt              *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
787698921bdaSJacob Faibussowitsch     MatScalar             *aa;
787798921bdaSJacob Faibussowitsch     PetscBool              ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
787898921bdaSJacob Faibussowitsch     Mat                    B                 = aij->B;
787998921bdaSJacob Faibussowitsch     Mat_SeqAIJ            *b                 = (Mat_SeqAIJ *)B->data;
788098921bdaSJacob Faibussowitsch     PetscInt              *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
788198921bdaSJacob Faibussowitsch     MatScalar             *ba;
788298921bdaSJacob Faibussowitsch     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
788398921bdaSJacob Faibussowitsch      * cannot use "#if defined" inside a macro. */
788498921bdaSJacob Faibussowitsch     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
788598921bdaSJacob Faibussowitsch 
788698921bdaSJacob Faibussowitsch     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
788798921bdaSJacob Faibussowitsch     PetscInt   nonew = a->nonew;
788898921bdaSJacob Faibussowitsch     MatScalar *ap1, *ap2;
788998921bdaSJacob Faibussowitsch 
789098921bdaSJacob Faibussowitsch     PetscFunctionBegin;
78919566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(A, &aa));
78929566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(B, &ba));
789398921bdaSJacob Faibussowitsch     for (i = 0; i < m; i++) {
789498921bdaSJacob Faibussowitsch       if (im[i] < 0) continue;
78956bdcaf15SBarry Smith       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
789698921bdaSJacob Faibussowitsch       if (im[i] >= rstart && im[i] < rend) {
789798921bdaSJacob Faibussowitsch         row      = im[i] - rstart;
789898921bdaSJacob Faibussowitsch         lastcol1 = -1;
789998921bdaSJacob Faibussowitsch         rp1      = aj + ai[row];
790098921bdaSJacob Faibussowitsch         ap1      = aa + ai[row];
790198921bdaSJacob Faibussowitsch         rmax1    = aimax[row];
790298921bdaSJacob Faibussowitsch         nrow1    = ailen[row];
790398921bdaSJacob Faibussowitsch         low1     = 0;
790498921bdaSJacob Faibussowitsch         high1    = nrow1;
790598921bdaSJacob Faibussowitsch         lastcol2 = -1;
790698921bdaSJacob Faibussowitsch         rp2      = bj + bi[row];
790798921bdaSJacob Faibussowitsch         ap2      = ba + bi[row];
790898921bdaSJacob Faibussowitsch         rmax2    = bimax[row];
790998921bdaSJacob Faibussowitsch         nrow2    = bilen[row];
791098921bdaSJacob Faibussowitsch         low2     = 0;
791198921bdaSJacob Faibussowitsch         high2    = nrow2;
791298921bdaSJacob Faibussowitsch 
791398921bdaSJacob Faibussowitsch         for (j = 0; j < n; j++) {
791498921bdaSJacob Faibussowitsch           if (roworiented) value = v[i * n + j];
791598921bdaSJacob Faibussowitsch           else value = v[i + j * m];
791698921bdaSJacob Faibussowitsch           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
791798921bdaSJacob Faibussowitsch           if (in[j] >= cstart && in[j] < cend) {
791898921bdaSJacob Faibussowitsch             col = in[j] - cstart;
791998921bdaSJacob Faibussowitsch             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
792098921bdaSJacob Faibussowitsch           } else if (in[j] < 0) continue;
792198921bdaSJacob Faibussowitsch           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
792298921bdaSJacob Faibussowitsch             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
792363a3b9bcSJacob Faibussowitsch             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
792498921bdaSJacob Faibussowitsch           } else {
792598921bdaSJacob Faibussowitsch             if (mat->was_assembled) {
792648a46eb9SPierre Jolivet               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
792798921bdaSJacob Faibussowitsch #if defined(PETSC_USE_CTABLE)
79289566063dSJacob Faibussowitsch               PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col));
792998921bdaSJacob Faibussowitsch               col--;
793098921bdaSJacob Faibussowitsch #else
793198921bdaSJacob Faibussowitsch               col = aij->colmap[in[j]] - 1;
793298921bdaSJacob Faibussowitsch #endif
793398921bdaSJacob Faibussowitsch               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
79349566063dSJacob Faibussowitsch                 PetscCall(MatDisAssemble_MPIAIJ(mat));
793598921bdaSJacob Faibussowitsch                 col      = in[j];
793698921bdaSJacob Faibussowitsch                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
793798921bdaSJacob Faibussowitsch                 B        = aij->B;
793898921bdaSJacob Faibussowitsch                 b        = (Mat_SeqAIJ *)B->data;
79399371c9d4SSatish Balay                 bimax    = b->imax;
79409371c9d4SSatish Balay                 bi       = b->i;
79419371c9d4SSatish Balay                 bilen    = b->ilen;
79429371c9d4SSatish Balay                 bj       = b->j;
794398921bdaSJacob Faibussowitsch                 rp2      = bj + bi[row];
794498921bdaSJacob Faibussowitsch                 ap2      = ba + bi[row];
794598921bdaSJacob Faibussowitsch                 rmax2    = bimax[row];
794698921bdaSJacob Faibussowitsch                 nrow2    = bilen[row];
794798921bdaSJacob Faibussowitsch                 low2     = 0;
794898921bdaSJacob Faibussowitsch                 high2    = nrow2;
794998921bdaSJacob Faibussowitsch                 bm       = aij->B->rmap->n;
795098921bdaSJacob Faibussowitsch                 ba       = b->a;
795198921bdaSJacob Faibussowitsch                 inserted = PETSC_FALSE;
795298921bdaSJacob Faibussowitsch               }
795398921bdaSJacob Faibussowitsch             } else col = in[j];
795498921bdaSJacob Faibussowitsch             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
795598921bdaSJacob Faibussowitsch           }
795698921bdaSJacob Faibussowitsch         }
795798921bdaSJacob Faibussowitsch       } else if (!aij->donotstash) {
795898921bdaSJacob Faibussowitsch         if (roworiented) {
79599566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
796098921bdaSJacob Faibussowitsch         } else {
79619566063dSJacob Faibussowitsch           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
796298921bdaSJacob Faibussowitsch         }
796398921bdaSJacob Faibussowitsch       }
796498921bdaSJacob Faibussowitsch     }
79659566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(A, &aa));
79669566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(B, &ba));
796798921bdaSJacob Faibussowitsch   }
796898921bdaSJacob Faibussowitsch   PetscFunctionReturnVoid();
796998921bdaSJacob Faibussowitsch }
797072833a62Smarkadams4 
797198921bdaSJacob Faibussowitsch /* Undefining these here since they were redefined from their original definition above! No
797298921bdaSJacob Faibussowitsch  * other PETSc functions should be defined past this point, as it is impossible to recover the
797398921bdaSJacob Faibussowitsch  * original definitions */
79749566063dSJacob Faibussowitsch #undef PetscCall
797598921bdaSJacob Faibussowitsch #undef SETERRQ
7976