xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 674b392b83cf8035b957a991cb868619d727fa0c)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2af0996ceSBarry Smith #include <petsc/private/vecimpl.h>
397929ea7SJunchao Zhang #include <petsc/private/sfimpl.h>
4af0996ceSBarry Smith #include <petsc/private/isimpl.h>
5c6db04a5SJed Brown #include <petscblaslapack.h>
60c312b8eSJed Brown #include <petscsf.h>
7bc8e477aSFande Kong #include <petsc/private/hashmapi.h>
88a729477SBarry Smith 
9*674b392bSAlexander /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10*674b392bSAlexander #define TYPE AIJ
11*674b392bSAlexander #define TYPE_AIJ
12*674b392bSAlexander #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13*674b392bSAlexander #undef TYPE
14*674b392bSAlexander #undef TYPE_AIJ
15*674b392bSAlexander 
16*674b392bSAlexander static PetscErrorCode MatReset_MPIAIJ(Mat mat)
1726cec326SBarry Smith {
1826cec326SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1926cec326SBarry Smith 
2026cec326SBarry Smith   PetscFunctionBegin;
2126cec326SBarry Smith   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
2226cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->stash));
2326cec326SBarry Smith   PetscCall(VecDestroy(&aij->diag));
2426cec326SBarry Smith   PetscCall(MatDestroy(&aij->A));
2526cec326SBarry Smith   PetscCall(MatDestroy(&aij->B));
2626cec326SBarry Smith #if defined(PETSC_USE_CTABLE)
2726cec326SBarry Smith   PetscCall(PetscHMapIDestroy(&aij->colmap));
2826cec326SBarry Smith #else
2926cec326SBarry Smith   PetscCall(PetscFree(aij->colmap));
3026cec326SBarry Smith #endif
3126cec326SBarry Smith   PetscCall(PetscFree(aij->garray));
3226cec326SBarry Smith   PetscCall(VecDestroy(&aij->lvec));
3326cec326SBarry Smith   PetscCall(VecScatterDestroy(&aij->Mvctx));
3426cec326SBarry Smith   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
3526cec326SBarry Smith   PetscCall(PetscFree(aij->ld));
36*674b392bSAlexander   PetscFunctionReturn(PETSC_SUCCESS);
37*674b392bSAlexander }
38*674b392bSAlexander 
39*674b392bSAlexander static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40*674b392bSAlexander {
41*674b392bSAlexander   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42*674b392bSAlexander   /* Save the nonzero states of the component matrices because those are what are used to determine
43*674b392bSAlexander     the nonzero state of mat */
44*674b392bSAlexander   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45*674b392bSAlexander 
46*674b392bSAlexander   PetscFunctionBegin;
47*674b392bSAlexander   PetscCall(MatReset_MPIAIJ(mat));
48*674b392bSAlexander   PetscCall(MatSetUp_MPI_Hash(mat));
49*674b392bSAlexander   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50*674b392bSAlexander   PetscFunctionReturn(PETSC_SUCCESS);
51*674b392bSAlexander }
52*674b392bSAlexander 
53*674b392bSAlexander PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54*674b392bSAlexander {
55*674b392bSAlexander   PetscFunctionBegin;
56*674b392bSAlexander   PetscCall(MatReset_MPIAIJ(mat));
5726cec326SBarry Smith 
5826cec326SBarry Smith   PetscCall(PetscFree(mat->data));
5926cec326SBarry Smith 
6026cec326SBarry Smith   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
6126cec326SBarry Smith   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
6226cec326SBarry Smith 
6326cec326SBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
6426cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
6526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
6626cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
6726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
6826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69*674b392bSAlexander   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
7026cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
7126cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
7226cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
7326cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
7426cec326SBarry Smith #if defined(PETSC_HAVE_CUDA)
7526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
7626cec326SBarry Smith #endif
7726cec326SBarry Smith #if defined(PETSC_HAVE_HIP)
7826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
7926cec326SBarry Smith #endif
8026cec326SBarry Smith #if defined(PETSC_HAVE_KOKKOS_KERNELS)
8126cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
8226cec326SBarry Smith #endif
8326cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
8426cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL)
8526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
8626cec326SBarry Smith #endif
8726cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK)
8826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
8926cec326SBarry Smith #endif
9026cec326SBarry Smith #if defined(PETSC_HAVE_HYPRE)
9126cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
9226cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
9326cec326SBarry Smith #endif
9426cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
9526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
9626cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
9726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
9826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
9926cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
10026cec326SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE)
10126cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
10226cec326SBarry Smith #endif
10326cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
10426cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
10526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
10626cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
10726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
10826cec326SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
10926cec326SBarry Smith }
11026cec326SBarry Smith 
111ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112d71ae5a4SJacob Faibussowitsch {
1138a9c020eSBarry Smith   Mat B;
1148a9c020eSBarry Smith 
1158a9c020eSBarry Smith   PetscFunctionBegin;
1168a9c020eSBarry Smith   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
1178a9c020eSBarry Smith   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
1188a9c020eSBarry Smith   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119501b8e33SLisandro Dalcin   PetscCall(MatDestroy(&B));
1203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1218a9c020eSBarry Smith }
1228a9c020eSBarry Smith 
123ba38deedSJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124d71ae5a4SJacob Faibussowitsch {
1258a9c020eSBarry Smith   Mat B;
1268a9c020eSBarry Smith 
1278a9c020eSBarry Smith   PetscFunctionBegin;
1288a9c020eSBarry Smith   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
1298a9c020eSBarry Smith   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130501b8e33SLisandro Dalcin   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
1313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1328a9c020eSBarry Smith }
1338a9c020eSBarry Smith 
13401bebe75SBarry Smith /*MC
13501bebe75SBarry Smith    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
13601bebe75SBarry Smith 
13711a5261eSBarry Smith    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
13811a5261eSBarry Smith    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
13911a5261eSBarry Smith   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
14001bebe75SBarry Smith   for communicators controlling multiple processes.  It is recommended that you call both of
14101bebe75SBarry Smith   the above preallocation routines for simplicity.
14201bebe75SBarry Smith 
14327430b45SBarry Smith    Options Database Key:
14411a5261eSBarry Smith . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
14501bebe75SBarry Smith 
14611a5261eSBarry Smith   Developer Note:
1472ef1f0ffSBarry Smith   Level: beginner
1482ef1f0ffSBarry Smith 
14911a5261eSBarry Smith     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
15001bebe75SBarry Smith    enough exist.
15101bebe75SBarry Smith 
1521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
15301bebe75SBarry Smith M*/
15401bebe75SBarry Smith 
15501bebe75SBarry Smith /*MC
15601bebe75SBarry Smith    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
15701bebe75SBarry Smith 
15811a5261eSBarry Smith    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
15911a5261eSBarry Smith    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
16011a5261eSBarry Smith    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
16101bebe75SBarry Smith   for communicators controlling multiple processes.  It is recommended that you call both of
16201bebe75SBarry Smith   the above preallocation routines for simplicity.
16301bebe75SBarry Smith 
16427430b45SBarry Smith    Options Database Key:
16511a5261eSBarry Smith . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
16601bebe75SBarry Smith 
16701bebe75SBarry Smith   Level: beginner
16801bebe75SBarry Smith 
1691cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
17001bebe75SBarry Smith M*/
17101bebe75SBarry Smith 
172d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173d71ae5a4SJacob Faibussowitsch {
174f74ef234SStefano Zampini   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175f74ef234SStefano Zampini 
176f74ef234SStefano Zampini   PetscFunctionBegin;
177d5e393b6SSuyash Tandon #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178b470e4b4SRichard Tran Mills   A->boundtocpu = flg;
179f74ef234SStefano Zampini #endif
1801baa6e33SBarry Smith   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
1811baa6e33SBarry Smith   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
1823120d049SRichard Tran Mills 
1833120d049SRichard Tran Mills   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
1843120d049SRichard Tran Mills    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
1853120d049SRichard Tran Mills    * to differ from the parent matrix. */
1861baa6e33SBarry Smith   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
1871baa6e33SBarry Smith   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
1883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
189f74ef234SStefano Zampini }
190f74ef234SStefano Zampini 
191ba38deedSJacob Faibussowitsch static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192d71ae5a4SJacob Faibussowitsch {
19326bda2c4Sstefano_zampini   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
19426bda2c4Sstefano_zampini 
19526bda2c4Sstefano_zampini   PetscFunctionBegin;
19646533700Sstefano_zampini   if (mat->A) {
1979566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
1989566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
19946533700Sstefano_zampini   }
2003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20126bda2c4Sstefano_zampini }
20226bda2c4Sstefano_zampini 
203ba38deedSJacob Faibussowitsch static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204d71ae5a4SJacob Faibussowitsch {
20527d4218bSShri Abhyankar   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
20627d4218bSShri Abhyankar   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
20727d4218bSShri Abhyankar   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
20827d4218bSShri Abhyankar   const PetscInt  *ia, *ib;
209ce496241SStefano Zampini   const MatScalar *aa, *bb, *aav, *bav;
21027d4218bSShri Abhyankar   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
21127d4218bSShri Abhyankar   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
21227d4218bSShri Abhyankar 
21327d4218bSShri Abhyankar   PetscFunctionBegin;
214f4259b30SLisandro Dalcin   *keptrows = NULL;
215ce496241SStefano Zampini 
21627d4218bSShri Abhyankar   ia = a->i;
21727d4218bSShri Abhyankar   ib = b->i;
2189566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
2199566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
22027d4218bSShri Abhyankar   for (i = 0; i < m; i++) {
22127d4218bSShri Abhyankar     na = ia[i + 1] - ia[i];
22227d4218bSShri Abhyankar     nb = ib[i + 1] - ib[i];
22327d4218bSShri Abhyankar     if (!na && !nb) {
22427d4218bSShri Abhyankar       cnt++;
22527d4218bSShri Abhyankar       goto ok1;
22627d4218bSShri Abhyankar     }
227ce496241SStefano Zampini     aa = aav + ia[i];
22827d4218bSShri Abhyankar     for (j = 0; j < na; j++) {
22927d4218bSShri Abhyankar       if (aa[j] != 0.0) goto ok1;
23027d4218bSShri Abhyankar     }
2318e3a54c0SPierre Jolivet     bb = PetscSafePointerPlusOffset(bav, ib[i]);
23227d4218bSShri Abhyankar     for (j = 0; j < nb; j++) {
23327d4218bSShri Abhyankar       if (bb[j] != 0.0) goto ok1;
23427d4218bSShri Abhyankar     }
23527d4218bSShri Abhyankar     cnt++;
23627d4218bSShri Abhyankar   ok1:;
23727d4218bSShri Abhyankar   }
238462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239ce496241SStefano Zampini   if (!n0rows) {
2409566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
2419566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
2423ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
243ce496241SStefano Zampini   }
2449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
24527d4218bSShri Abhyankar   cnt = 0;
24627d4218bSShri Abhyankar   for (i = 0; i < m; i++) {
24727d4218bSShri Abhyankar     na = ia[i + 1] - ia[i];
24827d4218bSShri Abhyankar     nb = ib[i + 1] - ib[i];
24927d4218bSShri Abhyankar     if (!na && !nb) continue;
250ce496241SStefano Zampini     aa = aav + ia[i];
25127d4218bSShri Abhyankar     for (j = 0; j < na; j++) {
25227d4218bSShri Abhyankar       if (aa[j] != 0.0) {
25327d4218bSShri Abhyankar         rows[cnt++] = rstart + i;
25427d4218bSShri Abhyankar         goto ok2;
25527d4218bSShri Abhyankar       }
25627d4218bSShri Abhyankar     }
2578e3a54c0SPierre Jolivet     bb = PetscSafePointerPlusOffset(bav, ib[i]);
25827d4218bSShri Abhyankar     for (j = 0; j < nb; j++) {
25927d4218bSShri Abhyankar       if (bb[j] != 0.0) {
26027d4218bSShri Abhyankar         rows[cnt++] = rstart + i;
26127d4218bSShri Abhyankar         goto ok2;
26227d4218bSShri Abhyankar       }
26327d4218bSShri Abhyankar     }
26427d4218bSShri Abhyankar   ok2:;
26527d4218bSShri Abhyankar   }
2669566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
2679566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
2689566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
2693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27027d4218bSShri Abhyankar }
27127d4218bSShri Abhyankar 
272ba38deedSJacob Faibussowitsch static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273d71ae5a4SJacob Faibussowitsch {
27499e65526SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
27594342113SStefano Zampini   PetscBool   cong;
27699e65526SBarry Smith 
27799e65526SBarry Smith   PetscFunctionBegin;
2789566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(Y, &cong));
27994342113SStefano Zampini   if (Y->assembled && cong) {
2809566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(aij->A, D, is));
28199e65526SBarry Smith   } else {
2829566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet_Default(Y, D, is));
28399e65526SBarry Smith   }
2843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28599e65526SBarry Smith }
28699e65526SBarry Smith 
287ba38deedSJacob Faibussowitsch static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288d71ae5a4SJacob Faibussowitsch {
289f1f41ecbSJed Brown   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290f1f41ecbSJed Brown   PetscInt    i, rstart, nrows, *rows;
291f1f41ecbSJed Brown 
292f1f41ecbSJed Brown   PetscFunctionBegin;
2930298fd71SBarry Smith   *zrows = NULL;
2949566063dSJacob Faibussowitsch   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
2959566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296f1f41ecbSJed Brown   for (i = 0; i < nrows; i++) rows[i] += rstart;
2979566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
2983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
299f1f41ecbSJed Brown }
300f1f41ecbSJed Brown 
301ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302d71ae5a4SJacob Faibussowitsch {
3030716a85fSBarry Smith   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304a873a8cdSSam Reynolds   PetscInt           i, m, n, *garray = aij->garray;
3050716a85fSBarry Smith   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
3060716a85fSBarry Smith   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
3070716a85fSBarry Smith   PetscReal         *work;
308ce496241SStefano Zampini   const PetscScalar *dummy;
3096497c311SBarry Smith   PetscMPIInt        in;
3100716a85fSBarry Smith 
3110716a85fSBarry Smith   PetscFunctionBegin;
3129566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
3139566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &work));
3149566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
3159566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
3169566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
3179566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
318857cbf51SRichard Tran Mills   if (type == NORM_2) {
319ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
320ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
321857cbf51SRichard Tran Mills   } else if (type == NORM_1) {
322ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
323ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
324857cbf51SRichard Tran Mills   } else if (type == NORM_INFINITY) {
325ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
326ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
327857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
328ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
329ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
330857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
331ad540459SPierre Jolivet     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
332ad540459SPierre Jolivet     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
333857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
3346497c311SBarry Smith   PetscCall(PetscMPIIntCast(n, &in));
335857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
336462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
3370716a85fSBarry Smith   } else {
338462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
3390716a85fSBarry Smith   }
3409566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
341857cbf51SRichard Tran Mills   if (type == NORM_2) {
342a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
343857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
344a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] /= m;
3450716a85fSBarry Smith   }
3463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3470716a85fSBarry Smith }
3480716a85fSBarry Smith 
349ba38deedSJacob Faibussowitsch static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
350d71ae5a4SJacob Faibussowitsch {
351e52d2c62SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
352e52d2c62SBarry Smith   IS              sis, gis;
353e52d2c62SBarry Smith   const PetscInt *isis, *igis;
354e52d2c62SBarry Smith   PetscInt        n, *iis, nsis, ngis, rstart, i;
355e52d2c62SBarry Smith 
356e52d2c62SBarry Smith   PetscFunctionBegin;
3579566063dSJacob Faibussowitsch   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
3589566063dSJacob Faibussowitsch   PetscCall(MatFindNonzeroRows(a->B, &gis));
3599566063dSJacob Faibussowitsch   PetscCall(ISGetSize(gis, &ngis));
3609566063dSJacob Faibussowitsch   PetscCall(ISGetSize(sis, &nsis));
3619566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(sis, &isis));
3629566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(gis, &igis));
363e52d2c62SBarry Smith 
3649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ngis + nsis, &iis));
3659566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(iis, igis, ngis));
3669566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
367e52d2c62SBarry Smith   n = ngis + nsis;
3689566063dSJacob Faibussowitsch   PetscCall(PetscSortRemoveDupsInt(&n, iis));
3699566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
370e52d2c62SBarry Smith   for (i = 0; i < n; i++) iis[i] += rstart;
3719566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
372e52d2c62SBarry Smith 
3739566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(sis, &isis));
3749566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(gis, &igis));
3759566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&sis));
3769566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&gis));
3773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
378e52d2c62SBarry Smith }
379e52d2c62SBarry Smith 
380dd6ea824SBarry Smith /*
3810f5bd95cSBarry Smith   Local utility routine that creates a mapping from the global column
3829e25ed09SBarry Smith number to the local number in the off-diagonal part of the local
3830f5bd95cSBarry Smith storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
3840f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor
38572fa4726SStefano Zampini has an order N integer array but is fast to access.
3869e25ed09SBarry Smith */
387d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
388d71ae5a4SJacob Faibussowitsch {
38944a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
390d0f46423SBarry Smith   PetscInt    n   = aij->B->cmap->n, i;
391dbb450caSBarry Smith 
3923a40ed3dSBarry Smith   PetscFunctionBegin;
39308401ef6SPierre Jolivet   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
394aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
395eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
396c76ffc5fSJacob Faibussowitsch   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
397b1fc9764SSatish Balay #else
3989566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
399905e6a2fSBarry Smith   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
400b1fc9764SSatish Balay #endif
4013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4029e25ed09SBarry Smith }
4039e25ed09SBarry Smith 
404d40312a9SBarry Smith #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
405a8f51744SPierre Jolivet   do { \
406db4deed7SKarl Rupp     if (col <= lastcol1) low1 = 0; \
407db4deed7SKarl Rupp     else high1 = nrow1; \
408fd3458f5SBarry Smith     lastcol1 = col; \
409fd3458f5SBarry Smith     while (high1 - low1 > 5) { \
410fd3458f5SBarry Smith       t = (low1 + high1) / 2; \
411fd3458f5SBarry Smith       if (rp1[t] > col) high1 = t; \
412fd3458f5SBarry Smith       else low1 = t; \
413ba4e3ef2SSatish Balay     } \
414fd3458f5SBarry Smith     for (_i = low1; _i < high1; _i++) { \
415fd3458f5SBarry Smith       if (rp1[_i] > col) break; \
416fd3458f5SBarry Smith       if (rp1[_i] == col) { \
4170c0d7e18SFande Kong         if (addv == ADD_VALUES) { \
4180c0d7e18SFande Kong           ap1[_i] += value; \
4190c0d7e18SFande Kong           /* Not sure LogFlops will slow dow the code or not */ \
4200c0d7e18SFande Kong           (void)PetscLogFlops(1.0); \
4219371c9d4SSatish Balay         } else ap1[_i] = value; \
42230770e4dSSatish Balay         goto a_noinsert; \
4230520107fSSatish Balay       } \
4240520107fSSatish Balay     } \
4259371c9d4SSatish Balay     if (value == 0.0 && ignorezeroentries && row != col) { \
4269371c9d4SSatish Balay       low1  = 0; \
4279371c9d4SSatish Balay       high1 = nrow1; \
4289371c9d4SSatish Balay       goto a_noinsert; \
4299371c9d4SSatish Balay     } \
4309371c9d4SSatish Balay     if (nonew == 1) { \
4319371c9d4SSatish Balay       low1  = 0; \
4329371c9d4SSatish Balay       high1 = nrow1; \
4339371c9d4SSatish Balay       goto a_noinsert; \
4349371c9d4SSatish Balay     } \
43508401ef6SPierre Jolivet     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
436fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
4379371c9d4SSatish Balay     N = nrow1++ - 1; \
4389371c9d4SSatish Balay     a->nz++; \
4399371c9d4SSatish Balay     high1++; \
4400520107fSSatish Balay     /* shift up all the later entries in this row */ \
4419566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
4429566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
443fd3458f5SBarry Smith     rp1[_i] = col; \
444fd3458f5SBarry Smith     ap1[_i] = value; \
44530770e4dSSatish Balay   a_noinsert:; \
446fd3458f5SBarry Smith     ailen[row] = nrow1; \
447a8f51744SPierre Jolivet   } while (0)
4480a198c4cSBarry Smith 
449d40312a9SBarry Smith #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
450a8f51744SPierre Jolivet   do { \
451db4deed7SKarl Rupp     if (col <= lastcol2) low2 = 0; \
452db4deed7SKarl Rupp     else high2 = nrow2; \
453fd3458f5SBarry Smith     lastcol2 = col; \
454fd3458f5SBarry Smith     while (high2 - low2 > 5) { \
455fd3458f5SBarry Smith       t = (low2 + high2) / 2; \
456fd3458f5SBarry Smith       if (rp2[t] > col) high2 = t; \
457fd3458f5SBarry Smith       else low2 = t; \
458ba4e3ef2SSatish Balay     } \
459fd3458f5SBarry Smith     for (_i = low2; _i < high2; _i++) { \
460fd3458f5SBarry Smith       if (rp2[_i] > col) break; \
461fd3458f5SBarry Smith       if (rp2[_i] == col) { \
4620c0d7e18SFande Kong         if (addv == ADD_VALUES) { \
4630c0d7e18SFande Kong           ap2[_i] += value; \
4640c0d7e18SFande Kong           (void)PetscLogFlops(1.0); \
4659371c9d4SSatish Balay         } else ap2[_i] = value; \
46630770e4dSSatish Balay         goto b_noinsert; \
46730770e4dSSatish Balay       } \
46830770e4dSSatish Balay     } \
4699371c9d4SSatish Balay     if (value == 0.0 && ignorezeroentries) { \
4709371c9d4SSatish Balay       low2  = 0; \
4719371c9d4SSatish Balay       high2 = nrow2; \
4729371c9d4SSatish Balay       goto b_noinsert; \
4739371c9d4SSatish Balay     } \
4749371c9d4SSatish Balay     if (nonew == 1) { \
4759371c9d4SSatish Balay       low2  = 0; \
4769371c9d4SSatish Balay       high2 = nrow2; \
4779371c9d4SSatish Balay       goto b_noinsert; \
4789371c9d4SSatish Balay     } \
47908401ef6SPierre Jolivet     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
480fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
4819371c9d4SSatish Balay     N = nrow2++ - 1; \
4829371c9d4SSatish Balay     b->nz++; \
4839371c9d4SSatish Balay     high2++; \
48430770e4dSSatish Balay     /* shift up all the later entries in this row */ \
4859566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
4869566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
487fd3458f5SBarry Smith     rp2[_i] = col; \
488fd3458f5SBarry Smith     ap2[_i] = value; \
48930770e4dSSatish Balay   b_noinsert:; \
490fd3458f5SBarry Smith     bilen[row] = nrow2; \
491a8f51744SPierre Jolivet   } while (0)
49230770e4dSSatish Balay 
493ba38deedSJacob Faibussowitsch static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
494d71ae5a4SJacob Faibussowitsch {
4952fd7e33dSBarry Smith   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
4962fd7e33dSBarry Smith   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
4972fd7e33dSBarry Smith   PetscInt     l, *garray                         = mat->garray, diag;
498fff043a9SJunchao Zhang   PetscScalar *aa, *ba;
4992fd7e33dSBarry Smith 
5002fd7e33dSBarry Smith   PetscFunctionBegin;
5012fd7e33dSBarry Smith   /* code only works for square matrices A */
5022fd7e33dSBarry Smith 
5032fd7e33dSBarry Smith   /* find size of row to the left of the diagonal part */
5049566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
5052fd7e33dSBarry Smith   row = row - diag;
5062fd7e33dSBarry Smith   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
5072fd7e33dSBarry Smith     if (garray[b->j[b->i[row] + l]] > diag) break;
5082fd7e33dSBarry Smith   }
509fff043a9SJunchao Zhang   if (l) {
5109566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
5119566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
5129566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
513fff043a9SJunchao Zhang   }
5142fd7e33dSBarry Smith 
5152fd7e33dSBarry Smith   /* diagonal part */
516fff043a9SJunchao Zhang   if (a->i[row + 1] - a->i[row]) {
5179566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
51857508eceSPierre Jolivet     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
5199566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
520fff043a9SJunchao Zhang   }
5212fd7e33dSBarry Smith 
5222fd7e33dSBarry Smith   /* right of diagonal part */
523fff043a9SJunchao Zhang   if (b->i[row + 1] - b->i[row] - l) {
5249566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
5259566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
5269566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
527fff043a9SJunchao Zhang   }
5283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5292fd7e33dSBarry Smith }
5302fd7e33dSBarry Smith 
531d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
532d71ae5a4SJacob Faibussowitsch {
53344a69424SLois Curfman McInnes   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
534071fcb05SBarry Smith   PetscScalar value = 0.0;
535d0f46423SBarry Smith   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
536d0f46423SBarry Smith   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
537ace3abfcSBarry Smith   PetscBool   roworiented = aij->roworiented;
5388a729477SBarry Smith 
5390520107fSSatish Balay   /* Some Variables required in the macro */
5404ee7247eSSatish Balay   Mat         A     = aij->A;
5414ee7247eSSatish Balay   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
54257809a77SBarry Smith   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
543ace3abfcSBarry Smith   PetscBool   ignorezeroentries = a->ignorezeroentries;
54430770e4dSSatish Balay   Mat         B                 = aij->B;
54530770e4dSSatish Balay   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
546d0f46423SBarry Smith   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
547ce496241SStefano Zampini   MatScalar  *aa, *ba;
548fd3458f5SBarry Smith   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
5498d76821aSHong Zhang   PetscInt    nonew;
550a77337e4SBarry Smith   MatScalar  *ap1, *ap2;
5514ee7247eSSatish Balay 
5523a40ed3dSBarry Smith   PetscFunctionBegin;
5539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(A, &aa));
5549566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(B, &ba));
5558a729477SBarry Smith   for (i = 0; i < m; i++) {
5565ef9f2a5SBarry Smith     if (im[i] < 0) continue;
55708401ef6SPierre Jolivet     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
5584b0e389bSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
5594b0e389bSBarry Smith       row      = im[i] - rstart;
560fd3458f5SBarry Smith       lastcol1 = -1;
5618e3a54c0SPierre Jolivet       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
5628e3a54c0SPierre Jolivet       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
563fd3458f5SBarry Smith       rmax1    = aimax[row];
564fd3458f5SBarry Smith       nrow1    = ailen[row];
565fd3458f5SBarry Smith       low1     = 0;
566fd3458f5SBarry Smith       high1    = nrow1;
567fd3458f5SBarry Smith       lastcol2 = -1;
5688e3a54c0SPierre Jolivet       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
5698e3a54c0SPierre Jolivet       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
570fd3458f5SBarry Smith       rmax2    = bimax[row];
571d498b1e9SBarry Smith       nrow2    = bilen[row];
572fd3458f5SBarry Smith       low2     = 0;
573fd3458f5SBarry Smith       high2    = nrow2;
574fd3458f5SBarry Smith 
5751eb62cbbSBarry Smith       for (j = 0; j < n; j++) {
576071fcb05SBarry Smith         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
577c80a64e6SBarry Smith         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
578fd3458f5SBarry Smith         if (in[j] >= cstart && in[j] < cend) {
579fd3458f5SBarry Smith           col   = in[j] - cstart;
5808d76821aSHong Zhang           nonew = a->nonew;
581d40312a9SBarry Smith           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
582f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
583f7d195e4SLawrence Mitchell           continue;
584f7d195e4SLawrence Mitchell         } else {
585f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
586227d817aSBarry Smith           if (mat->was_assembled) {
58748a46eb9SPierre Jolivet             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
588aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
589eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
590fa46199cSSatish Balay             col--;
591b1fc9764SSatish Balay #else
592905e6a2fSBarry Smith             col = aij->colmap[in[j]] - 1;
593b1fc9764SSatish Balay #endif
594f4f49eeaSPierre Jolivet             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
5959566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
5964b0e389bSBarry Smith               col = in[j];
5979bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
598f9508a3cSSatish Balay               B     = aij->B;
599f9508a3cSSatish Balay               b     = (Mat_SeqAIJ *)B->data;
6009371c9d4SSatish Balay               bimax = b->imax;
6019371c9d4SSatish Balay               bi    = b->i;
6029371c9d4SSatish Balay               bilen = b->ilen;
6039371c9d4SSatish Balay               bj    = b->j;
6049371c9d4SSatish Balay               ba    = b->a;
605cff58d65SJunchao Zhang               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
606cff58d65SJunchao Zhang               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
607d498b1e9SBarry Smith               rmax2 = bimax[row];
608d498b1e9SBarry Smith               nrow2 = bilen[row];
609d498b1e9SBarry Smith               low2  = 0;
610d498b1e9SBarry Smith               high2 = nrow2;
611d0f46423SBarry Smith               bm    = aij->B->rmap->n;
612f9508a3cSSatish Balay               ba    = b->a;
613d707bf6cSMatthew Knepley             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
614f4f49eeaSPierre Jolivet               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
6159566063dSJacob Faibussowitsch                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
61698921bdaSJacob Faibussowitsch               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
6170587a0fcSBarry Smith             }
618c48de900SBarry Smith           } else col = in[j];
6198d76821aSHong Zhang           nonew = b->nonew;
620d40312a9SBarry Smith           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
6211eb62cbbSBarry Smith         }
6221eb62cbbSBarry Smith       }
6235ef9f2a5SBarry Smith     } else {
62428b400f6SJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
62590f02eecSBarry Smith       if (!aij->donotstash) {
6265080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
627d36fbae8SSatish Balay         if (roworiented) {
6288e3a54c0SPierre Jolivet           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629d36fbae8SSatish Balay         } else {
6308e3a54c0SPierre Jolivet           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
6314b0e389bSBarry Smith         }
6321eb62cbbSBarry Smith       }
6338a729477SBarry Smith     }
63490f02eecSBarry Smith   }
6355519a089SJose E. Roman   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
6369566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(B, &ba));
6373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6388a729477SBarry Smith }
6398a729477SBarry Smith 
6402b08fdbeSandi selinger /*
641904d1e70Sandi selinger     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
6422b08fdbeSandi selinger     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
643904d1e70Sandi selinger     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
6442b08fdbeSandi selinger */
645d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
646d71ae5a4SJacob Faibussowitsch {
647904d1e70Sandi selinger   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
648904d1e70Sandi selinger   Mat         A      = aij->A; /* diagonal part of the matrix */
6494cf0e950SBarry Smith   Mat         B      = aij->B; /* off-diagonal part of the matrix */
650904d1e70Sandi selinger   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
651904d1e70Sandi selinger   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
652904d1e70Sandi selinger   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
653904d1e70Sandi selinger   PetscInt   *ailen = a->ilen, *aj = a->j;
654904d1e70Sandi selinger   PetscInt   *bilen = b->ilen, *bj = b->j;
6556dc1ffa3Sandi selinger   PetscInt    am          = aij->A->rmap->n, j;
656904d1e70Sandi selinger   PetscInt    diag_so_far = 0, dnz;
657904d1e70Sandi selinger   PetscInt    offd_so_far = 0, onz;
658904d1e70Sandi selinger 
659904d1e70Sandi selinger   PetscFunctionBegin;
660904d1e70Sandi selinger   /* Iterate over all rows of the matrix */
661904d1e70Sandi selinger   for (j = 0; j < am; j++) {
662904d1e70Sandi selinger     dnz = onz = 0;
663904d1e70Sandi selinger     /*  Iterate over all non-zero columns of the current row */
6646dc1ffa3Sandi selinger     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
665904d1e70Sandi selinger       /* If column is in the diagonal */
666904d1e70Sandi selinger       if (mat_j[col] >= cstart && mat_j[col] < cend) {
667904d1e70Sandi selinger         aj[diag_so_far++] = mat_j[col] - cstart;
668904d1e70Sandi selinger         dnz++;
669904d1e70Sandi selinger       } else { /* off-diagonal entries */
670904d1e70Sandi selinger         bj[offd_so_far++] = mat_j[col];
671904d1e70Sandi selinger         onz++;
672904d1e70Sandi selinger       }
673904d1e70Sandi selinger     }
674904d1e70Sandi selinger     ailen[j] = dnz;
675904d1e70Sandi selinger     bilen[j] = onz;
676904d1e70Sandi selinger   }
6773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
678904d1e70Sandi selinger }
679904d1e70Sandi selinger 
680904d1e70Sandi selinger /*
681904d1e70Sandi selinger     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
682904d1e70Sandi selinger     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
6831de21080Sandi selinger     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
6841de21080Sandi selinger     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
6851de21080Sandi selinger     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
686904d1e70Sandi selinger */
687d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
688d71ae5a4SJacob Faibussowitsch {
6893a063d27Sandi selinger   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
6903a063d27Sandi selinger   Mat          A    = aij->A; /* diagonal part of the matrix */
6914cf0e950SBarry Smith   Mat          B    = aij->B; /* off-diagonal part of the matrix */
692f4f49eeaSPierre Jolivet   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
6933a063d27Sandi selinger   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
6943a063d27Sandi selinger   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
6953a063d27Sandi selinger   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
6963a063d27Sandi selinger   PetscInt    *ailen = a->ilen, *aj = a->j;
6973a063d27Sandi selinger   PetscInt    *bilen = b->ilen, *bj = b->j;
6986dc1ffa3Sandi selinger   PetscInt     am          = aij->A->rmap->n, j;
6991de21080Sandi selinger   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
700904d1e70Sandi selinger   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
701904d1e70Sandi selinger   PetscScalar *aa = a->a, *ba = b->a;
7023a063d27Sandi selinger 
7033a063d27Sandi selinger   PetscFunctionBegin;
7043a063d27Sandi selinger   /* Iterate over all rows of the matrix */
7053a063d27Sandi selinger   for (j = 0; j < am; j++) {
706904d1e70Sandi selinger     dnz_row = onz_row = 0;
707904d1e70Sandi selinger     rowstart_offd     = full_offd_i[j];
708904d1e70Sandi selinger     rowstart_diag     = full_diag_i[j];
709e9ede7d0Sandi selinger     /*  Iterate over all non-zero columns of the current row */
710e9ede7d0Sandi selinger     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
711ae8e66a0Sandi selinger       /* If column is in the diagonal */
7123a063d27Sandi selinger       if (mat_j[col] >= cstart && mat_j[col] < cend) {
713904d1e70Sandi selinger         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
714904d1e70Sandi selinger         aa[rowstart_diag + dnz_row] = mat_a[col];
715904d1e70Sandi selinger         dnz_row++;
716ae8e66a0Sandi selinger       } else { /* off-diagonal entries */
717904d1e70Sandi selinger         bj[rowstart_offd + onz_row] = mat_j[col];
718904d1e70Sandi selinger         ba[rowstart_offd + onz_row] = mat_a[col];
719904d1e70Sandi selinger         onz_row++;
7203a063d27Sandi selinger       }
7213a063d27Sandi selinger     }
722904d1e70Sandi selinger     ailen[j] = dnz_row;
723904d1e70Sandi selinger     bilen[j] = onz_row;
7243a063d27Sandi selinger   }
7253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7263a063d27Sandi selinger }
7273a063d27Sandi selinger 
728ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
729d71ae5a4SJacob Faibussowitsch {
730b49de8d1SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
731d0f46423SBarry Smith   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
732d0f46423SBarry Smith   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
733b49de8d1SLois Curfman McInnes 
7343a40ed3dSBarry Smith   PetscFunctionBegin;
735b49de8d1SLois Curfman McInnes   for (i = 0; i < m; i++) {
73654c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
73754c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
73885835d77SBarry Smith     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
739b49de8d1SLois Curfman McInnes     row = idxm[i] - rstart;
740b49de8d1SLois Curfman McInnes     for (j = 0; j < n; j++) {
74154c59aa7SJacob Faibussowitsch       if (idxn[j] < 0) continue; /* negative column */
74254c59aa7SJacob Faibussowitsch       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
743b49de8d1SLois Curfman McInnes       if (idxn[j] >= cstart && idxn[j] < cend) {
744b49de8d1SLois Curfman McInnes         col = idxn[j] - cstart;
7459566063dSJacob Faibussowitsch         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
746fa852ad4SSatish Balay       } else {
74748a46eb9SPierre Jolivet         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
748aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
749eec179cfSJacob Faibussowitsch         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
750fa46199cSSatish Balay         col--;
751b1fc9764SSatish Balay #else
752905e6a2fSBarry Smith         col = aij->colmap[idxn[j]] - 1;
753b1fc9764SSatish Balay #endif
754e60e1c95SSatish Balay         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
75548a46eb9SPierre Jolivet         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
756b49de8d1SLois Curfman McInnes       }
757b49de8d1SLois Curfman McInnes     }
758b49de8d1SLois Curfman McInnes   }
7593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
760b49de8d1SLois Curfman McInnes }
761bc5ccf88SSatish Balay 
762ba38deedSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
763d71ae5a4SJacob Faibussowitsch {
764bc5ccf88SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
765b1d57f15SBarry Smith   PetscInt    nstash, reallocs;
766bc5ccf88SSatish Balay 
767bc5ccf88SSatish Balay   PetscFunctionBegin;
7683ba16761SJacob Faibussowitsch   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
769bc5ccf88SSatish Balay 
7709566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7719566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7729566063dSJacob Faibussowitsch   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
774bc5ccf88SSatish Balay }
775bc5ccf88SSatish Balay 
776d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
777d71ae5a4SJacob Faibussowitsch {
778bc5ccf88SSatish Balay   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
779b1d57f15SBarry Smith   PetscMPIInt  n;
780b1d57f15SBarry Smith   PetscInt     i, j, rstart, ncols, flg;
781e44c0bd4SBarry Smith   PetscInt    *row, *col;
782ace3abfcSBarry Smith   PetscBool    other_disassembled;
78387828ca2SBarry Smith   PetscScalar *val;
784bc5ccf88SSatish Balay 
78591c97fd4SSatish Balay   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
7866e111a19SKarl Rupp 
787bc5ccf88SSatish Balay   PetscFunctionBegin;
7884cb17eb5SBarry Smith   if (!aij->donotstash && !mat->nooffprocentries) {
789a2d1c673SSatish Balay     while (1) {
7909566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
791a2d1c673SSatish Balay       if (!flg) break;
792a2d1c673SSatish Balay 
793bc5ccf88SSatish Balay       for (i = 0; i < n;) {
794bc5ccf88SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
7952205254eSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
7962205254eSKarl Rupp           if (row[j] != rstart) break;
7972205254eSKarl Rupp         }
798bc5ccf88SSatish Balay         if (j < n) ncols = j - i;
799bc5ccf88SSatish Balay         else ncols = n - i;
800bc5ccf88SSatish Balay         /* Now assemble all these values with a single function call */
8019566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
802bc5ccf88SSatish Balay         i = j;
803bc5ccf88SSatish Balay       }
804bc5ccf88SSatish Balay     }
8059566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
806bc5ccf88SSatish Balay   }
8078c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
808c70f7ee4SJunchao Zhang   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
8099ecce9b1SRichard Tran Mills   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
8109ecce9b1SRichard Tran Mills   if (mat->boundtocpu) {
8119566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
8129566063dSJacob Faibussowitsch     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
8139ecce9b1SRichard Tran Mills   }
814e2cf4d64SStefano Zampini #endif
8159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(aij->A, mode));
8169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(aij->A, mode));
817bc5ccf88SSatish Balay 
818bc5ccf88SSatish Balay   /* determine if any processor has disassembled, if so we must
819071fcb05SBarry Smith      also disassemble ourself, in order that we may reassemble. */
820bc5ccf88SSatish Balay   /*
821bc5ccf88SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
822bc5ccf88SSatish Balay      no processor disassembled thus we can skip this stuff
823bc5ccf88SSatish Balay   */
824bc5ccf88SSatish Balay   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
825462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
82635cb6cd3SPierre Jolivet     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
8279566063dSJacob Faibussowitsch       PetscCall(MatDisAssemble_MPIAIJ(mat));
828ad59fb31SSatish Balay     }
829ad59fb31SSatish Balay   }
83048a46eb9SPierre Jolivet   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
8319566063dSJacob Faibussowitsch   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
8328c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
833c70f7ee4SJunchao Zhang   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
834e2cf4d64SStefano Zampini #endif
8359566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(aij->B, mode));
8369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(aij->B, mode));
837bc5ccf88SSatish Balay 
8389566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
8392205254eSKarl Rupp 
840f4259b30SLisandro Dalcin   aij->rowvalues = NULL;
841a30b2313SHong Zhang 
8429566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->diag));
843e56f5c9eSBarry Smith 
8444f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845f4f49eeaSPierre Jolivet   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
846e56f5c9eSBarry Smith     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
848e56f5c9eSBarry Smith   }
8498c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
850c70f7ee4SJunchao Zhang   mat->offloadmask = PETSC_OFFLOAD_BOTH;
851e2cf4d64SStefano Zampini #endif
8523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
853bc5ccf88SSatish Balay }
854bc5ccf88SSatish Balay 
855ba38deedSJacob Faibussowitsch static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
856d71ae5a4SJacob Faibussowitsch {
85744a69424SLois Curfman McInnes   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
8583a40ed3dSBarry Smith 
8593a40ed3dSBarry Smith   PetscFunctionBegin;
8609566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
8619566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
8623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8631eb62cbbSBarry Smith }
8641eb62cbbSBarry Smith 
865ba38deedSJacob Faibussowitsch static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
866d71ae5a4SJacob Faibussowitsch {
8671b1dd7adSMatthew G. Knepley   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
8681b1dd7adSMatthew G. Knepley   PetscInt   *lrows;
8696e520ac8SStefano Zampini   PetscInt    r, len;
8709939a2d1SBarry Smith   PetscBool   cong;
8711eb62cbbSBarry Smith 
8723a40ed3dSBarry Smith   PetscFunctionBegin;
8736e520ac8SStefano Zampini   /* get locally owned rows */
8749566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
8759566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &cong));
876dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
87797b48c8fSBarry Smith   if (x && b) {
8781b1dd7adSMatthew G. Knepley     const PetscScalar *xx;
8791b1dd7adSMatthew G. Knepley     PetscScalar       *bb;
8801b1dd7adSMatthew G. Knepley 
88128b400f6SJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
8829566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
8839566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
8841b1dd7adSMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
8859566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
8869566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
88797b48c8fSBarry Smith   }
888a92ad425SStefano Zampini 
889a92ad425SStefano Zampini   if (diag != 0.0 && cong) {
8909566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
8919566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
892a92ad425SStefano Zampini   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
893a92ad425SStefano Zampini     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
894a92ad425SStefano Zampini     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
895a92ad425SStefano Zampini     PetscInt    nnwA, nnwB;
896a92ad425SStefano Zampini     PetscBool   nnzA, nnzB;
897a92ad425SStefano Zampini 
898a92ad425SStefano Zampini     nnwA = aijA->nonew;
899a92ad425SStefano Zampini     nnwB = aijB->nonew;
900a92ad425SStefano Zampini     nnzA = aijA->keepnonzeropattern;
901a92ad425SStefano Zampini     nnzB = aijB->keepnonzeropattern;
902a92ad425SStefano Zampini     if (!nnzA) {
9039566063dSJacob Faibussowitsch       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
904a92ad425SStefano Zampini       aijA->nonew = 0;
905a92ad425SStefano Zampini     }
906a92ad425SStefano Zampini     if (!nnzB) {
9079566063dSJacob Faibussowitsch       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
908a92ad425SStefano Zampini       aijB->nonew = 0;
909a92ad425SStefano Zampini     }
910a92ad425SStefano Zampini     /* Must zero here before the next loop */
9119566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
9129566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
9131b1dd7adSMatthew G. Knepley     for (r = 0; r < len; ++r) {
9141b1dd7adSMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
915a92ad425SStefano Zampini       if (row >= A->cmap->N) continue;
9169566063dSJacob Faibussowitsch       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
917e2d53e46SBarry Smith     }
918a92ad425SStefano Zampini     aijA->nonew = nnwA;
919a92ad425SStefano Zampini     aijB->nonew = nnwB;
9206eb55b6aSBarry Smith   } else {
9219566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
9229566063dSJacob Faibussowitsch     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
9236eb55b6aSBarry Smith   }
9249566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
9259566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
9269566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
9274f9cfa9eSBarry Smith 
9289939a2d1SBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
929f4f49eeaSPierre Jolivet   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
9309939a2d1SBarry Smith     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
931462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
9329939a2d1SBarry Smith   }
9333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9341eb62cbbSBarry Smith }
9351eb62cbbSBarry Smith 
936ba38deedSJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
937d71ae5a4SJacob Faibussowitsch {
9389c7c4993SBarry Smith   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
9396497c311SBarry Smith   PetscInt           n = A->rmap->n;
940131c27b5Sprj-   PetscInt           i, j, r, m, len = 0;
94154bd4135SMatthew G. Knepley   PetscInt          *lrows, *owners = A->rmap->range;
942131c27b5Sprj-   PetscMPIInt        p = 0;
94354bd4135SMatthew G. Knepley   PetscSFNode       *rrows;
94454bd4135SMatthew G. Knepley   PetscSF            sf;
9459c7c4993SBarry Smith   const PetscScalar *xx;
946fff043a9SJunchao Zhang   PetscScalar       *bb, *mask, *aij_a;
947564f14d6SBarry Smith   Vec                xmask, lmask;
948564f14d6SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
949564f14d6SBarry Smith   const PetscInt    *aj, *ii, *ridx;
950564f14d6SBarry Smith   PetscScalar       *aa;
9519c7c4993SBarry Smith 
9529c7c4993SBarry Smith   PetscFunctionBegin;
95354bd4135SMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
9549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lrows));
95554bd4135SMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
9569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N, &rrows));
95754bd4135SMatthew G. Knepley   for (r = 0; r < N; ++r) {
95854bd4135SMatthew G. Knepley     const PetscInt idx = rows[r];
959aed4548fSBarry Smith     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
9605ba17502SJed Brown     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
9619566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
9625ba17502SJed Brown     }
96354bd4135SMatthew G. Knepley     rrows[r].rank  = p;
96454bd4135SMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
9659c7c4993SBarry Smith   }
9669566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
9679566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
96854bd4135SMatthew G. Knepley   /* Collect flags for rows to be zeroed */
9699566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
9709566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
9719566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
97254bd4135SMatthew G. Knepley   /* Compress and put in row numbers */
9739371c9d4SSatish Balay   for (r = 0; r < n; ++r)
9749371c9d4SSatish Balay     if (lrows[r] >= 0) lrows[len++] = r;
975564f14d6SBarry Smith   /* zero diagonal part of matrix */
9769566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
9774cf0e950SBarry Smith   /* handle off-diagonal part of matrix */
9789566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(A, &xmask, NULL));
9799566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(l->lvec, &lmask));
9809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xmask, &bb));
98154bd4135SMatthew G. Knepley   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
9829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xmask, &bb));
9839566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
9849566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
9859566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&xmask));
986a92ad425SStefano Zampini   if (x && b) { /* this code is buggy when the row and column layout don't match */
987a92ad425SStefano Zampini     PetscBool cong;
988a92ad425SStefano Zampini 
9899566063dSJacob Faibussowitsch     PetscCall(MatHasCongruentLayouts(A, &cong));
99028b400f6SJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
9919566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
9929566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
9939566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(l->lvec, &xx));
9949566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
995377aa5a1SBarry Smith   }
9969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lmask, &mask));
9974cf0e950SBarry Smith   /* remove zeroed rows of off-diagonal matrix */
9989566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
999564f14d6SBarry Smith   ii = aij->i;
10008e3a54c0SPierre Jolivet   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
1001564f14d6SBarry Smith   /* loop over all elements of off process part of matrix zeroing removed columns*/
1002564f14d6SBarry Smith   if (aij->compressedrow.use) {
1003564f14d6SBarry Smith     m    = aij->compressedrow.nrows;
1004564f14d6SBarry Smith     ii   = aij->compressedrow.i;
1005564f14d6SBarry Smith     ridx = aij->compressedrow.rindex;
1006564f14d6SBarry Smith     for (i = 0; i < m; i++) {
1007564f14d6SBarry Smith       n  = ii[i + 1] - ii[i];
1008564f14d6SBarry Smith       aj = aij->j + ii[i];
1009fff043a9SJunchao Zhang       aa = aij_a + ii[i];
1010564f14d6SBarry Smith 
1011564f14d6SBarry Smith       for (j = 0; j < n; j++) {
101225266a92SSatish Balay         if (PetscAbsScalar(mask[*aj])) {
1013377aa5a1SBarry Smith           if (b) bb[*ridx] -= *aa * xx[*aj];
1014564f14d6SBarry Smith           *aa = 0.0;
1015564f14d6SBarry Smith         }
1016564f14d6SBarry Smith         aa++;
1017564f14d6SBarry Smith         aj++;
1018564f14d6SBarry Smith       }
1019564f14d6SBarry Smith       ridx++;
1020564f14d6SBarry Smith     }
1021564f14d6SBarry Smith   } else { /* do not use compressed row format */
1022564f14d6SBarry Smith     m = l->B->rmap->n;
1023564f14d6SBarry Smith     for (i = 0; i < m; i++) {
1024564f14d6SBarry Smith       n  = ii[i + 1] - ii[i];
1025564f14d6SBarry Smith       aj = aij->j + ii[i];
1026fff043a9SJunchao Zhang       aa = aij_a + ii[i];
1027564f14d6SBarry Smith       for (j = 0; j < n; j++) {
102825266a92SSatish Balay         if (PetscAbsScalar(mask[*aj])) {
1029377aa5a1SBarry Smith           if (b) bb[i] -= *aa * xx[*aj];
1030564f14d6SBarry Smith           *aa = 0.0;
1031564f14d6SBarry Smith         }
1032564f14d6SBarry Smith         aa++;
1033564f14d6SBarry Smith         aj++;
1034564f14d6SBarry Smith       }
1035564f14d6SBarry Smith     }
1036564f14d6SBarry Smith   }
1037a92ad425SStefano Zampini   if (x && b) {
10389566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
10399566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1040377aa5a1SBarry Smith   }
10419566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
10429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lmask, &mask));
10439566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lmask));
10449566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
10454f9cfa9eSBarry Smith 
10464f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
1047f4f49eeaSPierre Jolivet   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
10484f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1049462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
10504f9cfa9eSBarry Smith   }
10513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10529c7c4993SBarry Smith }
10539c7c4993SBarry Smith 
1054ba38deedSJacob Faibussowitsch static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1055d71ae5a4SJacob Faibussowitsch {
1056416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1057b1d57f15SBarry Smith   PetscInt    nt;
105819b3b6edSHong Zhang   VecScatter  Mvctx = a->Mvctx;
1059416022c9SBarry Smith 
10603a40ed3dSBarry Smith   PetscFunctionBegin;
10619566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(xx, &nt));
106208401ef6SPierre Jolivet   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
10639566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064296d8154SBarry Smith   PetscUseTypeMethod(a->A, mult, xx, yy);
10659566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066296d8154SBarry Smith   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
10673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10681eb62cbbSBarry Smith }
10691eb62cbbSBarry Smith 
1070ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1071d71ae5a4SJacob Faibussowitsch {
1072bd0c2dcbSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073bd0c2dcbSBarry Smith 
1074bd0c2dcbSBarry Smith   PetscFunctionBegin;
10759566063dSJacob Faibussowitsch   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
10763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1077bd0c2dcbSBarry Smith }
1078bd0c2dcbSBarry Smith 
1079ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1080d71ae5a4SJacob Faibussowitsch {
1081416022c9SBarry Smith   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
108201ad2aeeSHong Zhang   VecScatter  Mvctx = a->Mvctx;
10833a40ed3dSBarry Smith 
10843a40ed3dSBarry Smith   PetscFunctionBegin;
10859566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
10869566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
10879566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
10889566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
10893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1090da3a660dSBarry Smith }
1091da3a660dSBarry Smith 
1092ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1093d71ae5a4SJacob Faibussowitsch {
1094416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1095da3a660dSBarry Smith 
10963a40ed3dSBarry Smith   PetscFunctionBegin;
1097da3a660dSBarry Smith   /* do nondiagonal part */
10989566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1099da3a660dSBarry Smith   /* do local part */
11009566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
11019613dc34SJunchao Zhang   /* add partial results together */
11029566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
11039566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
11043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1105da3a660dSBarry Smith }
1106da3a660dSBarry Smith 
1107ba38deedSJacob Faibussowitsch static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1108d71ae5a4SJacob Faibussowitsch {
11094f423910Svictorle   MPI_Comm    comm;
1110ad79cf63SBarry Smith   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1111ad79cf63SBarry Smith   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1112cd0d46ebSvictorle   IS          Me, Notme;
1113b1d57f15SBarry Smith   PetscInt    M, N, first, last, *notme, i;
111454d735aeSStefano Zampini   PetscBool   lf;
1115b1d57f15SBarry Smith   PetscMPIInt size;
1116cd0d46ebSvictorle 
1117cd0d46ebSvictorle   PetscFunctionBegin;
111842e5f5b4Svictorle   /* Easy test: symmetric diagonal block */
11199566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1120462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
11213ba16761SJacob Faibussowitsch   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
11229566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
11239566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
11243ba16761SJacob Faibussowitsch   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
112542e5f5b4Svictorle 
11267dae84e0SHong Zhang   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
11279566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Amat, &M, &N));
11289566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
11299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N - last + first, &notme));
1130cd0d46ebSvictorle   for (i = 0; i < first; i++) notme[i] = i;
1131cd0d46ebSvictorle   for (i = last; i < M; i++) notme[i - last + first] = i;
11329566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
11339566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
11349566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
113566501d38Svictorle   Aoff = Aoffs[0];
11369566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
113766501d38Svictorle   Boff = Boffs[0];
11389566063dSJacob Faibussowitsch   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
11399566063dSJacob Faibussowitsch   PetscCall(MatDestroyMatrices(1, &Aoffs));
11409566063dSJacob Faibussowitsch   PetscCall(MatDestroyMatrices(1, &Boffs));
11419566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&Me));
11429566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&Notme));
11439566063dSJacob Faibussowitsch   PetscCall(PetscFree(notme));
11443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1145cd0d46ebSvictorle }
1146cd0d46ebSvictorle 
1147ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1148d71ae5a4SJacob Faibussowitsch {
1149416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1150da3a660dSBarry Smith 
11513a40ed3dSBarry Smith   PetscFunctionBegin;
1152da3a660dSBarry Smith   /* do nondiagonal part */
11539566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1154da3a660dSBarry Smith   /* do local part */
11559566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
11569613dc34SJunchao Zhang   /* add partial results together */
11579566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
11589566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
11593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1160da3a660dSBarry Smith }
1161da3a660dSBarry Smith 
11621eb62cbbSBarry Smith /*
11631eb62cbbSBarry Smith   This only works correctly for square matrices where the subblock A->A is the
11641eb62cbbSBarry Smith    diagonal block
11651eb62cbbSBarry Smith */
1166ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1167d71ae5a4SJacob Faibussowitsch {
1168416022c9SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
11693a40ed3dSBarry Smith 
11703a40ed3dSBarry Smith   PetscFunctionBegin;
117108401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1172aed4548fSBarry Smith   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
11739566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
11743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11751eb62cbbSBarry Smith }
11761eb62cbbSBarry Smith 
1177ba38deedSJacob Faibussowitsch static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1178d71ae5a4SJacob Faibussowitsch {
1179052efed2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
11803a40ed3dSBarry Smith 
11813a40ed3dSBarry Smith   PetscFunctionBegin;
11829566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11839566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
11843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1185052efed2SBarry Smith }
1186052efed2SBarry Smith 
1187ba38deedSJacob Faibussowitsch static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1188d71ae5a4SJacob Faibussowitsch {
11898e2fed03SBarry Smith   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
11908e2fed03SBarry Smith   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
11918e2fed03SBarry Smith   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
11923ea6fe3dSLisandro Dalcin   const PetscInt    *garray = aij->garray;
11932e5835c6SStefano Zampini   const PetscScalar *aa, *ba;
119417a3732bSBarry Smith   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
119517a3732bSBarry Smith   PetscInt64         nz, hnz;
11963ea6fe3dSLisandro Dalcin   PetscInt          *rowlens;
11973ea6fe3dSLisandro Dalcin   PetscInt          *colidxs;
11983ea6fe3dSLisandro Dalcin   PetscScalar       *matvals;
119917a3732bSBarry Smith   PetscMPIInt        rank;
12008e2fed03SBarry Smith 
12018e2fed03SBarry Smith   PetscFunctionBegin;
12029566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
12033ea6fe3dSLisandro Dalcin 
12043ea6fe3dSLisandro Dalcin   M  = mat->rmap->N;
12053ea6fe3dSLisandro Dalcin   N  = mat->cmap->N;
12063ea6fe3dSLisandro Dalcin   m  = mat->rmap->n;
12073ea6fe3dSLisandro Dalcin   rs = mat->rmap->rstart;
12083ea6fe3dSLisandro Dalcin   cs = mat->cmap->rstart;
12098e2fed03SBarry Smith   nz = A->nz + B->nz;
12103ea6fe3dSLisandro Dalcin 
12113ea6fe3dSLisandro Dalcin   /* write matrix header */
12120700a824SBarry Smith   header[0] = MAT_FILE_CLASSID;
12139371c9d4SSatish Balay   header[1] = M;
12149371c9d4SSatish Balay   header[2] = N;
121517a3732bSBarry Smith   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
121617a3732bSBarry Smith   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1217835f2295SStefano Zampini   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
12189566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
12198e2fed03SBarry Smith 
12203ea6fe3dSLisandro Dalcin   /* fill in and store row lengths  */
12219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
12223ea6fe3dSLisandro Dalcin   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
12239566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
12249566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
12258e2fed03SBarry Smith 
12263ea6fe3dSLisandro Dalcin   /* fill in and store column indices */
12279566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
12283ea6fe3dSLisandro Dalcin   for (cnt = 0, i = 0; i < m; i++) {
12293ea6fe3dSLisandro Dalcin     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
12303ea6fe3dSLisandro Dalcin       if (garray[B->j[jb]] > cs) break;
12313ea6fe3dSLisandro Dalcin       colidxs[cnt++] = garray[B->j[jb]];
12328e2fed03SBarry Smith     }
12339371c9d4SSatish Balay     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
12349371c9d4SSatish Balay     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
12358e2fed03SBarry Smith   }
123617a3732bSBarry Smith   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
12379566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
12389566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
12398e2fed03SBarry Smith 
12403ea6fe3dSLisandro Dalcin   /* fill in and store nonzero values */
12419566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
12429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
12439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
12443ea6fe3dSLisandro Dalcin   for (cnt = 0, i = 0; i < m; i++) {
12453ea6fe3dSLisandro Dalcin     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
12463ea6fe3dSLisandro Dalcin       if (garray[B->j[jb]] > cs) break;
12472e5835c6SStefano Zampini       matvals[cnt++] = ba[jb];
12488e2fed03SBarry Smith     }
12499371c9d4SSatish Balay     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
12509371c9d4SSatish Balay     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
12518e2fed03SBarry Smith   }
12529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
12539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
125417a3732bSBarry Smith   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
12559566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
12569566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
12578e2fed03SBarry Smith 
12583ea6fe3dSLisandro Dalcin   /* write block size option to the viewer's .info file */
12599566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
12603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
12618e2fed03SBarry Smith }
12628e2fed03SBarry Smith 
12639804daf3SBarry Smith #include <petscdraw.h>
1264ba38deedSJacob Faibussowitsch static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1265d71ae5a4SJacob Faibussowitsch {
126644a69424SLois Curfman McInnes   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
126732dcc486SBarry Smith   PetscMPIInt       rank = aij->rank, size = aij->size;
1268ace3abfcSBarry Smith   PetscBool         isdraw, iascii, isbinary;
1269b0a32e0cSBarry Smith   PetscViewer       sviewer;
1270f3ef73ceSBarry Smith   PetscViewerFormat format;
1271416022c9SBarry Smith 
12723a40ed3dSBarry Smith   PetscFunctionBegin;
12739566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
12749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
12759566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
127632077d6dSBarry Smith   if (iascii) {
12779566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
1278ef5fdb51SBarry Smith     if (format == PETSC_VIEWER_LOAD_BALANCE) {
12791690c2aeSBarry Smith       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
12809566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(size, &nz));
12819566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1282835f2295SStefano Zampini       for (i = 0; i < size; i++) {
1283ef5fdb51SBarry Smith         nmax = PetscMax(nmax, nz[i]);
1284ef5fdb51SBarry Smith         nmin = PetscMin(nmin, nz[i]);
1285ef5fdb51SBarry Smith         navg += nz[i];
1286ef5fdb51SBarry Smith       }
12879566063dSJacob Faibussowitsch       PetscCall(PetscFree(nz));
1288ef5fdb51SBarry Smith       navg = navg / size;
12899566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
12903ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
1291ef5fdb51SBarry Smith     }
12929566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
1293456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
12944e220ebcSLois Curfman McInnes       MatInfo   info;
12956335e310SSatish Balay       PetscInt *inodes = NULL;
1296923f20ffSKris Buschelman 
12979566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
12989566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
12999566063dSJacob Faibussowitsch       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
13009566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1301923f20ffSKris Buschelman       if (!inodes) {
13029371c9d4SSatish Balay         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1303835f2295SStefano Zampini                                                      info.memory));
13046831982aSBarry Smith       } else {
1305835f2295SStefano Zampini         PetscCall(
1306835f2295SStefano Zampini           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
13076831982aSBarry Smith       }
13089566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
13099566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
13109566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
13119566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
13129566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
13139566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
13149566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
13159566063dSJacob Faibussowitsch       PetscCall(VecScatterView(aij->Mvctx, viewer));
13163ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
1317fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1318923f20ffSKris Buschelman       PetscInt inodecount, inodelimit, *inodes;
13199566063dSJacob Faibussowitsch       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1320923f20ffSKris Buschelman       if (inodes) {
13219566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1322d38fa0fbSBarry Smith       } else {
13239566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1324d38fa0fbSBarry Smith       }
13253ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
13264aedb280SBarry Smith     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
13273ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
132808480c60SBarry Smith     }
13298e2fed03SBarry Smith   } else if (isbinary) {
13308e2fed03SBarry Smith     if (size == 1) {
13319566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
13329566063dSJacob Faibussowitsch       PetscCall(MatView(aij->A, viewer));
13338e2fed03SBarry Smith     } else {
13349566063dSJacob Faibussowitsch       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
13358e2fed03SBarry Smith     }
13363ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
133771e56450SStefano Zampini   } else if (iascii && size == 1) {
13389566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
13399566063dSJacob Faibussowitsch     PetscCall(MatView(aij->A, viewer));
13403ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
13410f5bd95cSBarry Smith   } else if (isdraw) {
1342b0a32e0cSBarry Smith     PetscDraw draw;
1343ace3abfcSBarry Smith     PetscBool isnull;
13449566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
13459566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
13463ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
134719bcc07fSBarry Smith   }
134819bcc07fSBarry Smith 
134971e56450SStefano Zampini   { /* assemble the entire matrix onto first processor */
135071e56450SStefano Zampini     Mat A = NULL, Av;
135171e56450SStefano Zampini     IS  isrow, iscol;
13522ee70a88SLois Curfman McInnes 
13539566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
13549566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
13559566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
13569566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
135771e56450SStefano Zampini     /*  The commented code uses MatCreateSubMatrices instead */
135871e56450SStefano Zampini     /*
135971e56450SStefano Zampini     Mat *AA, A = NULL, Av;
136071e56450SStefano Zampini     IS  isrow,iscol;
136171e56450SStefano Zampini 
13629566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
13639566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
13649566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1365dd400576SPatrick Sanan     if (rank == 0) {
13669566063dSJacob Faibussowitsch        PetscCall(PetscObjectReference((PetscObject)AA[0]));
136771e56450SStefano Zampini        A    = AA[0];
136871e56450SStefano Zampini        Av   = AA[0];
136995373324SBarry Smith     }
13709566063dSJacob Faibussowitsch     PetscCall(MatDestroySubMatrices(1,&AA));
137171e56450SStefano Zampini */
13729566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol));
13739566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrow));
137455843e3eSBarry Smith     /*
137555843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1376b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
137755843e3eSBarry Smith     */
13789566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1379dd400576SPatrick Sanan     if (rank == 0) {
138048a46eb9SPierre Jolivet       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
13819566063dSJacob Faibussowitsch       PetscCall(MatView_SeqAIJ(Av, sviewer));
138295373324SBarry Smith     }
13839566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
13849566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
138595373324SBarry Smith   }
13863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13871eb62cbbSBarry Smith }
13881eb62cbbSBarry Smith 
1389d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1390d71ae5a4SJacob Faibussowitsch {
1391ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
1392416022c9SBarry Smith 
13933a40ed3dSBarry Smith   PetscFunctionBegin;
13949566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
13959566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
13969566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
13979566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
139848a46eb9SPierre Jolivet   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
13993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1400416022c9SBarry Smith }
1401416022c9SBarry Smith 
1402ba38deedSJacob Faibussowitsch static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1403d71ae5a4SJacob Faibussowitsch {
140444a69424SLois Curfman McInnes   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1405f4259b30SLisandro Dalcin   Vec         bb1 = NULL;
1406ace3abfcSBarry Smith   PetscBool   hasop;
14078a729477SBarry Smith 
14083a40ed3dSBarry Smith   PetscFunctionBegin;
1409a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
14109566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
14113ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1412a2b30743SBarry Smith   }
1413a2b30743SBarry Smith 
141448a46eb9SPierre Jolivet   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
14154e980039SJed Brown 
1416c16cb8f2SBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1417da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14189566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
14192798e883SHong Zhang       its--;
1420da3a660dSBarry Smith     }
14212798e883SHong Zhang 
14222798e883SHong Zhang     while (its--) {
14239566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14249566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14252798e883SHong Zhang 
1426c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14279566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
14289566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
14292798e883SHong Zhang 
1430c14dc6b6SHong Zhang       /* local sweep */
14319566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
14322798e883SHong Zhang     }
14333a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1434da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14359566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
14362798e883SHong Zhang       its--;
1437da3a660dSBarry Smith     }
14382798e883SHong Zhang     while (its--) {
14399566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14409566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14412798e883SHong Zhang 
1442c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14439566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
14449566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1445c14dc6b6SHong Zhang 
1446c14dc6b6SHong Zhang       /* local sweep */
14479566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
14482798e883SHong Zhang     }
14493a40ed3dSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1450da3a660dSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
14519566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
14522798e883SHong Zhang       its--;
1453da3a660dSBarry Smith     }
14542798e883SHong Zhang     while (its--) {
14559566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14569566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14572798e883SHong Zhang 
1458c14dc6b6SHong Zhang       /* update rhs: bb1 = bb - B*x */
14599566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
14609566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
14612798e883SHong Zhang 
1462c14dc6b6SHong Zhang       /* local sweep */
14639566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
14642798e883SHong Zhang     }
1465a7420bb7SBarry Smith   } else if (flag & SOR_EISENSTAT) {
1466a7420bb7SBarry Smith     Vec xx1;
1467a7420bb7SBarry Smith 
14689566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &xx1));
14699566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1470a7420bb7SBarry Smith 
14719566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
14729566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1473a7420bb7SBarry Smith     if (!mat->diag) {
14749566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
14759566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
1476a7420bb7SBarry Smith     }
14779566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1478bd0c2dcbSBarry Smith     if (hasop) {
14799566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1480bd0c2dcbSBarry Smith     } else {
14819566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1482bd0c2dcbSBarry Smith     }
14839566063dSJacob Faibussowitsch     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1484887ee2caSBarry Smith 
14859566063dSJacob Faibussowitsch     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1486a7420bb7SBarry Smith 
1487a7420bb7SBarry Smith     /* local sweep */
14889566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
14899566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
14909566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&xx1));
1491ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1492c14dc6b6SHong Zhang 
14939566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&bb1));
1494a0808db4SHong Zhang 
14957b6c816cSBarry Smith   matin->factorerrortype = mat->A->factorerrortype;
14963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14978a729477SBarry Smith }
1498a66be287SLois Curfman McInnes 
1499ba38deedSJacob Faibussowitsch static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1500d71ae5a4SJacob Faibussowitsch {
150172e6a0cfSJed Brown   Mat             aA, aB, Aperm;
150272e6a0cfSJed Brown   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
150372e6a0cfSJed Brown   PetscScalar    *aa, *ba;
150472e6a0cfSJed Brown   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
150572e6a0cfSJed Brown   PetscSF         rowsf, sf;
15060298fd71SBarry Smith   IS              parcolp = NULL;
150772e6a0cfSJed Brown   PetscBool       done;
150842e855d1Svictor 
150942e855d1Svictor   PetscFunctionBegin;
15109566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &m, &n));
15119566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(rowp, &rwant));
15129566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(colp, &cwant));
15139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
151472e6a0cfSJed Brown 
151572e6a0cfSJed Brown   /* Invert row permutation to find out where my rows should go */
15169566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
15179566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
15189566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(rowsf));
151972e6a0cfSJed Brown   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
15209566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
15219566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
152272e6a0cfSJed Brown 
152372e6a0cfSJed Brown   /* Invert column permutation to find out where my columns should go */
15249566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
15259566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
15269566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
152772e6a0cfSJed Brown   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
15289566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
15299566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
15309566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
153172e6a0cfSJed Brown 
15329566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(rowp, &rwant));
15339566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(colp, &cwant));
15349566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
153572e6a0cfSJed Brown 
153672e6a0cfSJed Brown   /* Find out where my gcols should go */
15379566063dSJacob Faibussowitsch   PetscCall(MatGetSize(aB, NULL, &ng));
15389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ng, &gcdest));
15399566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
15409566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
15419566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
15429566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
15439566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
15449566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
154572e6a0cfSJed Brown 
15469566063dSJacob Faibussowitsch   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
15479566063dSJacob Faibussowitsch   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
15489566063dSJacob Faibussowitsch   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
154972e6a0cfSJed Brown   for (i = 0; i < m; i++) {
1550131c27b5Sprj-     PetscInt    row = rdest[i];
1551131c27b5Sprj-     PetscMPIInt rowner;
15529566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
155372e6a0cfSJed Brown     for (j = ai[i]; j < ai[i + 1]; j++) {
1554131c27b5Sprj-       PetscInt    col = cdest[aj[j]];
1555131c27b5Sprj-       PetscMPIInt cowner;
15569566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
155772e6a0cfSJed Brown       if (rowner == cowner) dnnz[i]++;
155872e6a0cfSJed Brown       else onnz[i]++;
155972e6a0cfSJed Brown     }
156072e6a0cfSJed Brown     for (j = bi[i]; j < bi[i + 1]; j++) {
1561131c27b5Sprj-       PetscInt    col = gcdest[bj[j]];
1562131c27b5Sprj-       PetscMPIInt cowner;
15639566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
156472e6a0cfSJed Brown       if (rowner == cowner) dnnz[i]++;
156572e6a0cfSJed Brown       else onnz[i]++;
156672e6a0cfSJed Brown     }
156772e6a0cfSJed Brown   }
15689566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
15699566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
15709566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
15719566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
15729566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&rowsf));
157372e6a0cfSJed Brown 
15749566063dSJacob Faibussowitsch   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
15759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(aA, &aa));
15769566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(aB, &ba));
157772e6a0cfSJed Brown   for (i = 0; i < m; i++) {
157872e6a0cfSJed Brown     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1579970468b0SJed Brown     PetscInt  j0, rowlen;
158072e6a0cfSJed Brown     rowlen = ai[i + 1] - ai[i];
1581970468b0SJed Brown     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582970468b0SJed Brown       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
15839566063dSJacob Faibussowitsch       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1584970468b0SJed Brown     }
158572e6a0cfSJed Brown     rowlen = bi[i + 1] - bi[i];
1586970468b0SJed Brown     for (j0 = j = 0; j < rowlen; j0 = j) {
1587970468b0SJed Brown       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
15889566063dSJacob Faibussowitsch       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1589970468b0SJed Brown     }
159072e6a0cfSJed Brown   }
15919566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
15929566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
15939566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
15949566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
15959566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
15969566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
15979566063dSJacob Faibussowitsch   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
15989566063dSJacob Faibussowitsch   PetscCall(PetscFree3(work, rdest, cdest));
15999566063dSJacob Faibussowitsch   PetscCall(PetscFree(gcdest));
16009566063dSJacob Faibussowitsch   if (parcolp) PetscCall(ISDestroy(&colp));
160172e6a0cfSJed Brown   *B = Aperm;
16023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
160342e855d1Svictor }
160442e855d1Svictor 
1605ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1606d71ae5a4SJacob Faibussowitsch {
1607c5e4d11fSDmitry Karpeev   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1608c5e4d11fSDmitry Karpeev 
1609c5e4d11fSDmitry Karpeev   PetscFunctionBegin;
16109566063dSJacob Faibussowitsch   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1611c5e4d11fSDmitry Karpeev   if (ghosts) *ghosts = aij->garray;
16123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1613c5e4d11fSDmitry Karpeev }
1614c5e4d11fSDmitry Karpeev 
1615ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1616d71ae5a4SJacob Faibussowitsch {
1617a66be287SLois Curfman McInnes   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1618a66be287SLois Curfman McInnes   Mat            A = mat->A, B = mat->B;
16193966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1620a66be287SLois Curfman McInnes 
16213a40ed3dSBarry Smith   PetscFunctionBegin;
16224e220ebcSLois Curfman McInnes   info->block_size = 1.0;
16239566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
16242205254eSKarl Rupp 
16259371c9d4SSatish Balay   isend[0] = info->nz_used;
16269371c9d4SSatish Balay   isend[1] = info->nz_allocated;
16279371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
16289371c9d4SSatish Balay   isend[3] = info->memory;
16299371c9d4SSatish Balay   isend[4] = info->mallocs;
16302205254eSKarl Rupp 
16319566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
16322205254eSKarl Rupp 
16339371c9d4SSatish Balay   isend[0] += info->nz_used;
16349371c9d4SSatish Balay   isend[1] += info->nz_allocated;
16359371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
16369371c9d4SSatish Balay   isend[3] += info->memory;
16379371c9d4SSatish Balay   isend[4] += info->mallocs;
1638a66be287SLois Curfman McInnes   if (flag == MAT_LOCAL) {
16394e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
16404e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
16414e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
16424e220ebcSLois Curfman McInnes     info->memory       = isend[3];
16434e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
1644a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
1645462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
16462205254eSKarl Rupp 
16474e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
16484e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
16494e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
16504e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
16514e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1652a66be287SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
1653462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
16542205254eSKarl Rupp 
16554e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
16564e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
16574e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
16584e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
16594e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1660a66be287SLois Curfman McInnes   }
16614e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
16624e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
16634e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
16643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1665a66be287SLois Curfman McInnes }
1666a66be287SLois Curfman McInnes 
1667d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1668d71ae5a4SJacob Faibussowitsch {
1669c0bbcb79SLois Curfman McInnes   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1670c74985f6SBarry Smith 
16713a40ed3dSBarry Smith   PetscFunctionBegin;
167212c028f9SKris Buschelman   switch (op) {
1673512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
167412c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
167528b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1676a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
167712c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
16780ad02fcaSStefano Zampini   case MAT_USE_INODES:
167912c028f9SKris Buschelman   case MAT_IGNORE_ZERO_ENTRIES:
16801a2c6b5cSJunchao Zhang   case MAT_FORM_EXPLICIT_TRANSPOSE:
1681fa1f0d2cSMatthew G Knepley     MatCheckPreallocated(A, 1);
16829566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
16839566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
168412c028f9SKris Buschelman     break;
168512c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
168643674050SBarry Smith     MatCheckPreallocated(A, 1);
16874e0d8c25SBarry Smith     a->roworiented = flg;
16882205254eSKarl Rupp 
16899566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
16909566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
169112c028f9SKris Buschelman     break;
16928c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1693d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1694d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1695d71ae5a4SJacob Faibussowitsch     break;
1696d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1697d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1698d71ae5a4SJacob Faibussowitsch     break;
1699c8ca1fbcSVaclav Hapla   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1700ffa07934SHong Zhang   case MAT_SPD:
170177e54ba9SKris Buschelman   case MAT_SYMMETRIC:
170277e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
1703bf108f30SBarry Smith   case MAT_HERMITIAN:
1704bf108f30SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1705b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1706b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1707b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
170877e54ba9SKris Buschelman     break;
1709d71ae5a4SJacob Faibussowitsch   case MAT_SUBMAT_SINGLEIS:
1710d71ae5a4SJacob Faibussowitsch     A->submat_singleis = flg;
1711d71ae5a4SJacob Faibussowitsch     break;
1712957cac9fSHong Zhang   case MAT_STRUCTURE_ONLY:
1713957cac9fSHong Zhang     /* The option is handled directly by MatSetOption() */
1714957cac9fSHong Zhang     break;
1715d71ae5a4SJacob Faibussowitsch   default:
1716d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
17173a40ed3dSBarry Smith   }
17183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1719c74985f6SBarry Smith }
1720c74985f6SBarry Smith 
1721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1722d71ae5a4SJacob Faibussowitsch {
1723154123eaSLois Curfman McInnes   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
172487828ca2SBarry Smith   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1725d0f46423SBarry Smith   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1726d0f46423SBarry Smith   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1727b1d57f15SBarry Smith   PetscInt    *cmap, *idx_p;
172839e00950SLois Curfman McInnes 
17293a40ed3dSBarry Smith   PetscFunctionBegin;
173028b400f6SJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
17317a0afa10SBarry Smith   mat->getrowactive = PETSC_TRUE;
17327a0afa10SBarry Smith 
173370f0671dSBarry Smith   if (!mat->rowvalues && (idx || v)) {
17347a0afa10SBarry Smith     /*
17357a0afa10SBarry Smith         allocate enough space to hold information from the longest row.
17367a0afa10SBarry Smith     */
17377a0afa10SBarry Smith     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1738b1d57f15SBarry Smith     PetscInt    max = 1, tmp;
1739d0f46423SBarry Smith     for (i = 0; i < matin->rmap->n; i++) {
17407a0afa10SBarry Smith       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
17412205254eSKarl Rupp       if (max < tmp) max = tmp;
17427a0afa10SBarry Smith     }
17439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
17447a0afa10SBarry Smith   }
17457a0afa10SBarry Smith 
1746aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1747abc0e9e4SLois Curfman McInnes   lrow = row - rstart;
174839e00950SLois Curfman McInnes 
17499371c9d4SSatish Balay   pvA = &vworkA;
17509371c9d4SSatish Balay   pcA = &cworkA;
17519371c9d4SSatish Balay   pvB = &vworkB;
17529371c9d4SSatish Balay   pcB = &cworkB;
17539371c9d4SSatish Balay   if (!v) {
17549371c9d4SSatish Balay     pvA = NULL;
17559371c9d4SSatish Balay     pvB = NULL;
17569371c9d4SSatish Balay   }
17579371c9d4SSatish Balay   if (!idx) {
17589371c9d4SSatish Balay     pcA = NULL;
17599371c9d4SSatish Balay     if (!v) pcB = NULL;
17609371c9d4SSatish Balay   }
17619566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
17629566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1763154123eaSLois Curfman McInnes   nztot = nzA + nzB;
1764154123eaSLois Curfman McInnes 
176570f0671dSBarry Smith   cmap = mat->garray;
1766154123eaSLois Curfman McInnes   if (v || idx) {
1767154123eaSLois Curfman McInnes     if (nztot) {
1768154123eaSLois Curfman McInnes       /* Sort by increasing column numbers, assuming A and B already sorted */
1769b1d57f15SBarry Smith       PetscInt imark = -1;
1770154123eaSLois Curfman McInnes       if (v) {
177170f0671dSBarry Smith         *v = v_p = mat->rowvalues;
177239e00950SLois Curfman McInnes         for (i = 0; i < nzB; i++) {
177370f0671dSBarry Smith           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1774154123eaSLois Curfman McInnes           else break;
1775154123eaSLois Curfman McInnes         }
1776154123eaSLois Curfman McInnes         imark = i;
177770f0671dSBarry Smith         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
177870f0671dSBarry Smith         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1779154123eaSLois Curfman McInnes       }
1780154123eaSLois Curfman McInnes       if (idx) {
178170f0671dSBarry Smith         *idx = idx_p = mat->rowindices;
178270f0671dSBarry Smith         if (imark > -1) {
1783ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
178470f0671dSBarry Smith         } else {
1785154123eaSLois Curfman McInnes           for (i = 0; i < nzB; i++) {
178670f0671dSBarry Smith             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1787154123eaSLois Curfman McInnes             else break;
1788154123eaSLois Curfman McInnes           }
1789154123eaSLois Curfman McInnes           imark = i;
179070f0671dSBarry Smith         }
179170f0671dSBarry Smith         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
179270f0671dSBarry Smith         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
179339e00950SLois Curfman McInnes       }
17943f97c4b0SBarry Smith     } else {
1795f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1796f4259b30SLisandro Dalcin       if (v) *v = NULL;
17971ca473b0SSatish Balay     }
1798154123eaSLois Curfman McInnes   }
179939e00950SLois Curfman McInnes   *nz = nztot;
18009566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
18019566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
18023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
180339e00950SLois Curfman McInnes }
180439e00950SLois Curfman McInnes 
1805d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1806d71ae5a4SJacob Faibussowitsch {
18077a0afa10SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
18083a40ed3dSBarry Smith 
18093a40ed3dSBarry Smith   PetscFunctionBegin;
181028b400f6SJacob Faibussowitsch   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
18117a0afa10SBarry Smith   aij->getrowactive = PETSC_FALSE;
18123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
181339e00950SLois Curfman McInnes }
181439e00950SLois Curfman McInnes 
1815ba38deedSJacob Faibussowitsch static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1816d71ae5a4SJacob Faibussowitsch {
1817855ac2c5SLois Curfman McInnes   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1818ec8511deSBarry Smith   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1819d0f46423SBarry Smith   PetscInt         i, j, cstart = mat->cmap->rstart;
1820329f5518SBarry Smith   PetscReal        sum = 0.0;
1821fff043a9SJunchao Zhang   const MatScalar *v, *amata, *bmata;
18226497c311SBarry Smith   PetscMPIInt      iN;
182304ca555eSLois Curfman McInnes 
18243a40ed3dSBarry Smith   PetscFunctionBegin;
182517699dbbSLois Curfman McInnes   if (aij->size == 1) {
18269566063dSJacob Faibussowitsch     PetscCall(MatNorm(aij->A, type, norm));
182737fa93a5SLois Curfman McInnes   } else {
18289566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
18299566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
183004ca555eSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
1831fff043a9SJunchao Zhang       v = amata;
183204ca555eSLois Curfman McInnes       for (i = 0; i < amat->nz; i++) {
18339371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
18349371c9d4SSatish Balay         v++;
183504ca555eSLois Curfman McInnes       }
1836fff043a9SJunchao Zhang       v = bmata;
183704ca555eSLois Curfman McInnes       for (i = 0; i < bmat->nz; i++) {
18389371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
18399371c9d4SSatish Balay         v++;
184004ca555eSLois Curfman McInnes       }
1841462c564dSBarry Smith       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
18428f1a2a5eSBarry Smith       *norm = PetscSqrtReal(*norm);
18439566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
18443a40ed3dSBarry Smith     } else if (type == NORM_1) { /* max column norm */
1845329f5518SBarry Smith       PetscReal *tmp, *tmp2;
1846b1d57f15SBarry Smith       PetscInt  *jj, *garray = aij->garray;
18479566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
18489566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
184904ca555eSLois Curfman McInnes       *norm = 0.0;
18509371c9d4SSatish Balay       v     = amata;
18519371c9d4SSatish Balay       jj    = amat->j;
185204ca555eSLois Curfman McInnes       for (j = 0; j < amat->nz; j++) {
18539371c9d4SSatish Balay         tmp[cstart + *jj++] += PetscAbsScalar(*v);
18549371c9d4SSatish Balay         v++;
185504ca555eSLois Curfman McInnes       }
18569371c9d4SSatish Balay       v  = bmata;
18579371c9d4SSatish Balay       jj = bmat->j;
185804ca555eSLois Curfman McInnes       for (j = 0; j < bmat->nz; j++) {
18599371c9d4SSatish Balay         tmp[garray[*jj++]] += PetscAbsScalar(*v);
18609371c9d4SSatish Balay         v++;
186104ca555eSLois Curfman McInnes       }
18626497c311SBarry Smith       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1863462c564dSBarry Smith       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1864d0f46423SBarry Smith       for (j = 0; j < mat->cmap->N; j++) {
186504ca555eSLois Curfman McInnes         if (tmp2[j] > *norm) *norm = tmp2[j];
186604ca555eSLois Curfman McInnes       }
18679566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp));
18689566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp2));
18699566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
18703a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1871329f5518SBarry Smith       PetscReal ntemp = 0.0;
1872d0f46423SBarry Smith       for (j = 0; j < aij->A->rmap->n; j++) {
18738e3a54c0SPierre Jolivet         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
187404ca555eSLois Curfman McInnes         sum = 0.0;
187504ca555eSLois Curfman McInnes         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
18769371c9d4SSatish Balay           sum += PetscAbsScalar(*v);
18779371c9d4SSatish Balay           v++;
187804ca555eSLois Curfman McInnes         }
18798e3a54c0SPierre Jolivet         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
188004ca555eSLois Curfman McInnes         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
18819371c9d4SSatish Balay           sum += PetscAbsScalar(*v);
18829371c9d4SSatish Balay           v++;
188304ca555eSLois Curfman McInnes         }
1884515d9167SLois Curfman McInnes         if (sum > ntemp) ntemp = sum;
188504ca555eSLois Curfman McInnes       }
1886462c564dSBarry Smith       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
18879566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1888ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
18899566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
18909566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
189137fa93a5SLois Curfman McInnes   }
18923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1893855ac2c5SLois Curfman McInnes }
1894855ac2c5SLois Curfman McInnes 
1895ba38deedSJacob Faibussowitsch static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1896d71ae5a4SJacob Faibussowitsch {
1897a8661f62Sandi selinger   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1898a8661f62Sandi selinger   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1899071fcb05SBarry Smith   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1900071fcb05SBarry Smith   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1901a8661f62Sandi selinger   Mat              B, A_diag, *B_diag;
1902ce496241SStefano Zampini   const MatScalar *pbv, *bv;
1903b7c46309SBarry Smith 
19043a40ed3dSBarry Smith   PetscFunctionBegin;
19057fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
19069371c9d4SSatish Balay   ma = A->rmap->n;
19079371c9d4SSatish Balay   na = A->cmap->n;
19089371c9d4SSatish Balay   mb = a->B->rmap->n;
19099371c9d4SSatish Balay   nb = a->B->cmap->n;
19109371c9d4SSatish Balay   ai = Aloc->i;
19119371c9d4SSatish Balay   aj = Aloc->j;
19129371c9d4SSatish Balay   bi = Bloc->i;
19139371c9d4SSatish Balay   bj = Bloc->j;
1914fc73b1b3SBarry Smith   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
191580bcc5a1SJed Brown     PetscInt            *d_nnz, *g_nnz, *o_nnz;
191680bcc5a1SJed Brown     PetscSFNode         *oloc;
1917713c93b4SJed Brown     PETSC_UNUSED PetscSF sf;
191880bcc5a1SJed Brown 
19199566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
192080bcc5a1SJed Brown     /* compute d_nnz for preallocation */
19219566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(d_nnz, na));
1922cbc6b225SStefano Zampini     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
192380bcc5a1SJed Brown     /* compute local off-diagonal contributions */
19249566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(g_nnz, nb));
192580bcc5a1SJed Brown     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
192680bcc5a1SJed Brown     /* map those to global */
19279566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
19289566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
19299566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(sf));
19309566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(o_nnz, na));
193157168dbeSPierre Jolivet     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
193257168dbeSPierre Jolivet     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
19339566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&sf));
1934d4bb536fSBarry Smith 
19359566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
19369566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
19379566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
19389566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
19399566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
19409566063dSJacob Faibussowitsch     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1941fc4dec0aSBarry Smith   } else {
1942fc4dec0aSBarry Smith     B = *matout;
19439566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1944fc4dec0aSBarry Smith   }
1945b7c46309SBarry Smith 
1946f79cb1a0Sandi selinger   b           = (Mat_MPIAIJ *)B->data;
1947a8661f62Sandi selinger   A_diag      = a->A;
1948a8661f62Sandi selinger   B_diag      = &b->A;
1949a8661f62Sandi selinger   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1950a8661f62Sandi selinger   A_diag_ncol = A_diag->cmap->N;
1951a8661f62Sandi selinger   B_diag_ilen = sub_B_diag->ilen;
1952a8661f62Sandi selinger   B_diag_i    = sub_B_diag->i;
1953f79cb1a0Sandi selinger 
1954f79cb1a0Sandi selinger   /* Set ilen for diagonal of B */
1955ad540459SPierre Jolivet   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1956f79cb1a0Sandi selinger 
19574cf0e950SBarry Smith   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1958a8661f62Sandi selinger   very quickly (=without using MatSetValues), because all writes are local. */
19597fb60732SBarry Smith   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
19609566063dSJacob Faibussowitsch   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1961f79cb1a0Sandi selinger 
1962b7c46309SBarry Smith   /* copy over the B part */
19639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[mb], &cols));
19649566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1965ce496241SStefano Zampini   pbv = bv;
1966d0f46423SBarry Smith   row = A->rmap->rstart;
19672205254eSKarl Rupp   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
196861a2fbbaSHong Zhang   cols_tmp = cols;
1969da668accSHong Zhang   for (i = 0; i < mb; i++) {
1970da668accSHong Zhang     ncol = bi[i + 1] - bi[i];
19719566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
19722205254eSKarl Rupp     row++;
1973720a2405SPierre Jolivet     if (pbv) pbv += ncol;
1974720a2405SPierre Jolivet     if (cols_tmp) cols_tmp += ncol;
1975b7c46309SBarry Smith   }
19769566063dSJacob Faibussowitsch   PetscCall(PetscFree(cols));
19779566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1978fc73b1b3SBarry Smith 
19799566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
19809566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1981cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
19820de55854SLois Curfman McInnes     *matout = B;
19830de55854SLois Curfman McInnes   } else {
19849566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &B));
19850de55854SLois Curfman McInnes   }
19863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1987b7c46309SBarry Smith }
1988b7c46309SBarry Smith 
1989ba38deedSJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1990d71ae5a4SJacob Faibussowitsch {
19914b967eb1SSatish Balay   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19924b967eb1SSatish Balay   Mat         a = aij->A, b = aij->B;
1993b1d57f15SBarry Smith   PetscInt    s1, s2, s3;
1994a008b906SSatish Balay 
19953a40ed3dSBarry Smith   PetscFunctionBegin;
19969566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &s2, &s3));
19974b967eb1SSatish Balay   if (rr) {
19989566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s1));
199908401ef6SPierre Jolivet     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
20004b967eb1SSatish Balay     /* Overlap communication with computation. */
20019566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002a008b906SSatish Balay   }
20034b967eb1SSatish Balay   if (ll) {
20049566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s1));
200508401ef6SPierre Jolivet     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2006dbbe0bcdSBarry Smith     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
20074b967eb1SSatish Balay   }
20084b967eb1SSatish Balay   /* scale  the diagonal block */
2009dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
20104b967eb1SSatish Balay 
20114b967eb1SSatish Balay   if (rr) {
20124b967eb1SSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
20139566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2014dbbe0bcdSBarry Smith     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
20154b967eb1SSatish Balay   }
20163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2017a008b906SSatish Balay }
2018a008b906SSatish Balay 
2019ba38deedSJacob Faibussowitsch static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2020d71ae5a4SJacob Faibussowitsch {
2021bb5a7306SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
20223a40ed3dSBarry Smith 
20233a40ed3dSBarry Smith   PetscFunctionBegin;
20249566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
20253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2026bb5a7306SBarry Smith }
2027bb5a7306SBarry Smith 
2028ba38deedSJacob Faibussowitsch static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2029d71ae5a4SJacob Faibussowitsch {
2030d4bb536fSBarry Smith   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2031d4bb536fSBarry Smith   Mat         a, b, c, d;
2032ace3abfcSBarry Smith   PetscBool   flg;
2033d4bb536fSBarry Smith 
20343a40ed3dSBarry Smith   PetscFunctionBegin;
20359371c9d4SSatish Balay   a = matA->A;
20369371c9d4SSatish Balay   b = matA->B;
20379371c9d4SSatish Balay   c = matB->A;
20389371c9d4SSatish Balay   d = matB->B;
2039d4bb536fSBarry Smith 
20409566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
204148a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
2042462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
20433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2044d4bb536fSBarry Smith }
2045d4bb536fSBarry Smith 
2046ba38deedSJacob Faibussowitsch static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2047d71ae5a4SJacob Faibussowitsch {
2048cb5b572fSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2049cb5b572fSBarry Smith   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2050cb5b572fSBarry Smith 
2051cb5b572fSBarry Smith   PetscFunctionBegin;
205233f4a19fSKris Buschelman   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
205333f4a19fSKris Buschelman   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2054cb5b572fSBarry Smith     /* because of the column compression in the off-processor part of the matrix a->B,
2055cb5b572fSBarry Smith        the number of columns in a->B and b->B may be different, hence we cannot call
2056cb5b572fSBarry Smith        the MatCopy() directly on the two parts. If need be, we can provide a more
2057cb5b572fSBarry Smith        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2058cb5b572fSBarry Smith        then copying the submatrices */
20599566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
2060cb5b572fSBarry Smith   } else {
20619566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
20629566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
2063cb5b572fSBarry Smith   }
20649566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
20653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2066cb5b572fSBarry Smith }
2067cb5b572fSBarry Smith 
2068001ddc4fSHong Zhang /*
2069001ddc4fSHong Zhang    Computes the number of nonzeros per row needed for preallocation when X and Y
2070001ddc4fSHong Zhang    have different nonzero structure.
2071001ddc4fSHong Zhang */
2072d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2073d71ae5a4SJacob Faibussowitsch {
2074001ddc4fSHong Zhang   PetscInt i, j, k, nzx, nzy;
207595b7e79eSJed Brown 
207695b7e79eSJed Brown   PetscFunctionBegin;
207795b7e79eSJed Brown   /* Set the number of nonzeros in the new matrix */
207895b7e79eSJed Brown   for (i = 0; i < m; i++) {
20798e3a54c0SPierre Jolivet     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2080001ddc4fSHong Zhang     nzx    = xi[i + 1] - xi[i];
2081001ddc4fSHong Zhang     nzy    = yi[i + 1] - yi[i];
208295b7e79eSJed Brown     nnz[i] = 0;
208395b7e79eSJed Brown     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2084001ddc4fSHong Zhang       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085001ddc4fSHong Zhang       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
208695b7e79eSJed Brown       nnz[i]++;
208795b7e79eSJed Brown     }
208895b7e79eSJed Brown     for (; k < nzy; k++) nnz[i]++;
208995b7e79eSJed Brown   }
20903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
209195b7e79eSJed Brown }
209295b7e79eSJed Brown 
2093001ddc4fSHong Zhang /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2095d71ae5a4SJacob Faibussowitsch {
2096001ddc4fSHong Zhang   PetscInt    m = Y->rmap->N;
2097001ddc4fSHong Zhang   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2098001ddc4fSHong Zhang   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2099001ddc4fSHong Zhang 
2100001ddc4fSHong Zhang   PetscFunctionBegin;
21019566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
21023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2103001ddc4fSHong Zhang }
2104001ddc4fSHong Zhang 
2105ba38deedSJacob Faibussowitsch static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2106d71ae5a4SJacob Faibussowitsch {
2107ac90fabeSBarry Smith   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2108ac90fabeSBarry Smith 
2109ac90fabeSBarry Smith   PetscFunctionBegin;
2110ac90fabeSBarry Smith   if (str == SAME_NONZERO_PATTERN) {
21119566063dSJacob Faibussowitsch     PetscCall(MatAXPY(yy->A, a, xx->A, str));
21129566063dSJacob Faibussowitsch     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2113ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
21149566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
2115ac90fabeSBarry Smith   } else {
21169f5f6813SShri Abhyankar     Mat       B;
21179f5f6813SShri Abhyankar     PetscInt *nnz_d, *nnz_o;
2118d9d719b4SStefano Zampini 
21199566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
21209566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
21219566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
21229566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
21239566063dSJacob Faibussowitsch     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
21249566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
21259566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
21269566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
21279566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
21289566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
21299566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
21309566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
21319566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
2132ac90fabeSBarry Smith   }
21333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2134ac90fabeSBarry Smith }
2135ac90fabeSBarry Smith 
21362726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137354c94deSBarry Smith 
2138ba38deedSJacob Faibussowitsch static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139d71ae5a4SJacob Faibussowitsch {
21405f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
21415f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
2142354c94deSBarry Smith     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2143354c94deSBarry Smith 
21449566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqAIJ(aij->A));
21459566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqAIJ(aij->B));
21465f80ce2aSJacob Faibussowitsch   }
21473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2148354c94deSBarry Smith }
2149354c94deSBarry Smith 
2150ba38deedSJacob Faibussowitsch static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151d71ae5a4SJacob Faibussowitsch {
215299cafbc1SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
215399cafbc1SBarry Smith 
215499cafbc1SBarry Smith   PetscFunctionBegin;
21559566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
21569566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
21573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
215899cafbc1SBarry Smith }
215999cafbc1SBarry Smith 
2160ba38deedSJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161d71ae5a4SJacob Faibussowitsch {
216299cafbc1SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
216399cafbc1SBarry Smith 
216499cafbc1SBarry Smith   PetscFunctionBegin;
21659566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
21669566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
21673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
216899cafbc1SBarry Smith }
216999cafbc1SBarry Smith 
2170ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2171d71ae5a4SJacob Faibussowitsch {
2172c91732d9SHong Zhang   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2173475b8b61SHong Zhang   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2174475b8b61SHong Zhang   PetscScalar       *va, *vv;
2175475b8b61SHong Zhang   Vec                vB, vA;
2176475b8b61SHong Zhang   const PetscScalar *vb;
2177c91732d9SHong Zhang 
2178c91732d9SHong Zhang   PetscFunctionBegin;
2179c7b600bfSPierre Jolivet   PetscCall(MatCreateVecs(a->A, NULL, &vA));
21809566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2181475b8b61SHong Zhang 
21829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(vA, &va));
2183c91732d9SHong Zhang   if (idx) {
2184475b8b61SHong Zhang     for (i = 0; i < m; i++) {
2185d0f46423SBarry Smith       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186c91732d9SHong Zhang     }
2187c91732d9SHong Zhang   }
2188c91732d9SHong Zhang 
2189c7b600bfSPierre Jolivet   PetscCall(MatCreateVecs(a->B, NULL, &vB));
21909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &idxb));
21919566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2192c91732d9SHong Zhang 
21939566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &vv));
21949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(vB, &vb));
2195475b8b61SHong Zhang   for (i = 0; i < m; i++) {
2196c91732d9SHong Zhang     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197475b8b61SHong Zhang       vv[i] = vb[i];
2198c91732d9SHong Zhang       if (idx) idx[i] = a->garray[idxb[i]];
2199475b8b61SHong Zhang     } else {
2200475b8b61SHong Zhang       vv[i] = va[i];
22019371c9d4SSatish Balay       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2202c91732d9SHong Zhang     }
2203c91732d9SHong Zhang   }
22049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA, &vv));
22059566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA, &va));
22069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(vB, &vb));
22079566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxb));
22089566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vA));
22099566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vB));
22103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2211c91732d9SHong Zhang }
2212c91732d9SHong Zhang 
2213eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2214eede4a3fSMark Adams {
2215eede4a3fSMark Adams   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2216eede4a3fSMark Adams   Vec         vB, vA;
2217eede4a3fSMark Adams 
2218eede4a3fSMark Adams   PetscFunctionBegin;
2219c7b600bfSPierre Jolivet   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2220eede4a3fSMark Adams   PetscCall(MatGetRowSumAbs(a->A, vA));
2221c7b600bfSPierre Jolivet   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2222eede4a3fSMark Adams   PetscCall(MatGetRowSumAbs(a->B, vB));
2223eede4a3fSMark Adams   PetscCall(VecAXPY(vA, 1.0, vB));
2224eede4a3fSMark Adams   PetscCall(VecDestroy(&vB));
2225eede4a3fSMark Adams   PetscCall(VecCopy(vA, v));
2226eede4a3fSMark Adams   PetscCall(VecDestroy(&vA));
2227eede4a3fSMark Adams   PetscFunctionReturn(PETSC_SUCCESS);
2228eede4a3fSMark Adams }
2229eede4a3fSMark Adams 
2230ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231d71ae5a4SJacob Faibussowitsch {
2232f07e67edSHong Zhang   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233f07e67edSHong Zhang   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234f07e67edSHong Zhang   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235f07e67edSHong Zhang   PetscInt          *cmap = mat->garray;
2236f07e67edSHong Zhang   PetscInt          *diagIdx, *offdiagIdx;
2237f07e67edSHong Zhang   Vec                diagV, offdiagV;
2238ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2239ce496241SStefano Zampini   const PetscScalar *ba, *bav;
2240f07e67edSHong Zhang   PetscInt           r, j, col, ncols, *bi, *bj;
2241f07e67edSHong Zhang   Mat                B = mat->B;
2242f07e67edSHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243c87e5d42SMatthew Knepley 
2244c87e5d42SMatthew Knepley   PetscFunctionBegin;
2245f07e67edSHong Zhang   /* When a process holds entire A and other processes have no entry */
2246f07e67edSHong Zhang   if (A->cmap->N == n) {
22479566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v, &diagA));
22489566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
22499566063dSJacob Faibussowitsch     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
22509566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
22519566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v, &diagA));
22523ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2253f07e67edSHong Zhang   } else if (n == 0) {
2254f07e67edSHong Zhang     if (m) {
22559566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v, &a));
22569371c9d4SSatish Balay       for (r = 0; r < m; r++) {
22579371c9d4SSatish Balay         a[r] = 0.0;
22589371c9d4SSatish Balay         if (idx) idx[r] = -1;
22599371c9d4SSatish Balay       }
22609566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v, &a));
2261f07e67edSHong Zhang     }
22623ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2263f07e67edSHong Zhang   }
2264f07e67edSHong Zhang 
22659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
22669566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
22679566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
22689566063dSJacob Faibussowitsch   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269f07e67edSHong Zhang 
2270f07e67edSHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
22719566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272ce496241SStefano Zampini   ba = bav;
2273f07e67edSHong Zhang   bi = b->i;
2274f07e67edSHong Zhang   bj = b->j;
22759566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276f07e67edSHong Zhang   for (r = 0; r < m; r++) {
2277f07e67edSHong Zhang     ncols = bi[r + 1] - bi[r];
2278f07e67edSHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
22799371c9d4SSatish Balay       offdiagA[r]   = *ba;
22809371c9d4SSatish Balay       offdiagIdx[r] = cmap[0];
2281f07e67edSHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282f07e67edSHong Zhang       offdiagA[r] = 0.0;
2283f07e67edSHong Zhang 
2284f07e67edSHong Zhang       /* Find first hole in the cmap */
2285f07e67edSHong Zhang       for (j = 0; j < ncols; j++) {
2286f07e67edSHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287f07e67edSHong Zhang         if (col > j && j < cstart) {
2288f07e67edSHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289f07e67edSHong Zhang           break;
2290f07e67edSHong Zhang         } else if (col > j + n && j >= cstart) {
2291f07e67edSHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292f07e67edSHong Zhang           break;
2293f07e67edSHong Zhang         }
2294f07e67edSHong Zhang       }
22954e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
2296f07e67edSHong Zhang         /* a hole is outside compressed Bcols */
2297f07e67edSHong Zhang         if (ncols == 0) {
2298f07e67edSHong Zhang           if (cstart) {
2299f07e67edSHong Zhang             offdiagIdx[r] = 0;
2300f07e67edSHong Zhang           } else offdiagIdx[r] = cend;
2301f07e67edSHong Zhang         } else { /* ncols > 0 */
2302f07e67edSHong Zhang           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303f07e67edSHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304f07e67edSHong Zhang         }
2305f07e67edSHong Zhang       }
2306f07e67edSHong Zhang     }
2307f07e67edSHong Zhang 
2308f07e67edSHong Zhang     for (j = 0; j < ncols; j++) {
23099371c9d4SSatish Balay       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
23109371c9d4SSatish Balay         offdiagA[r]   = *ba;
23119371c9d4SSatish Balay         offdiagIdx[r] = cmap[*bj];
23129371c9d4SSatish Balay       }
23139371c9d4SSatish Balay       ba++;
23149371c9d4SSatish Balay       bj++;
2315f07e67edSHong Zhang     }
2316f07e67edSHong Zhang   }
2317f07e67edSHong Zhang 
23189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
23199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320f07e67edSHong Zhang   for (r = 0; r < m; ++r) {
2321f07e67edSHong Zhang     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322f07e67edSHong Zhang       a[r] = diagA[r];
2323f07e67edSHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
2324f07e67edSHong Zhang     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325f07e67edSHong Zhang       a[r] = diagA[r];
2326c87e5d42SMatthew Knepley       if (idx) {
2327f07e67edSHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328f07e67edSHong Zhang           idx[r] = cstart + diagIdx[r];
2329f07e67edSHong Zhang         } else idx[r] = offdiagIdx[r];
2330f07e67edSHong Zhang       }
2331f07e67edSHong Zhang     } else {
2332f07e67edSHong Zhang       a[r] = offdiagA[r];
2333f07e67edSHong Zhang       if (idx) idx[r] = offdiagIdx[r];
2334c87e5d42SMatthew Knepley     }
2335c87e5d42SMatthew Knepley   }
23369566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
23379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
23389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
23399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
23409566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
23419566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
23429566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
23433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2344c87e5d42SMatthew Knepley }
2345c87e5d42SMatthew Knepley 
2346ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347d71ae5a4SJacob Faibussowitsch {
234803bc72f1SMatthew Knepley   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349fa213d2fSHong Zhang   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350fa213d2fSHong Zhang   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
235103bc72f1SMatthew Knepley   PetscInt          *cmap = mat->garray;
235203bc72f1SMatthew Knepley   PetscInt          *diagIdx, *offdiagIdx;
235303bc72f1SMatthew Knepley   Vec                diagV, offdiagV;
2354ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2355ce496241SStefano Zampini   const PetscScalar *ba, *bav;
2356fa213d2fSHong Zhang   PetscInt           r, j, col, ncols, *bi, *bj;
2357fa213d2fSHong Zhang   Mat                B = mat->B;
2358fa213d2fSHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
235903bc72f1SMatthew Knepley 
236003bc72f1SMatthew Knepley   PetscFunctionBegin;
2361fa213d2fSHong Zhang   /* When a process holds entire A and other processes have no entry */
2362fa213d2fSHong Zhang   if (A->cmap->N == n) {
23639566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v, &diagA));
23649566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
23659566063dSJacob Faibussowitsch     PetscCall(MatGetRowMin(mat->A, diagV, idx));
23669566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
23679566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v, &diagA));
23683ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2369fa213d2fSHong Zhang   } else if (n == 0) {
2370fa213d2fSHong Zhang     if (m) {
23719566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v, &a));
23729371c9d4SSatish Balay       for (r = 0; r < m; r++) {
23739371c9d4SSatish Balay         a[r] = PETSC_MAX_REAL;
23749371c9d4SSatish Balay         if (idx) idx[r] = -1;
23759371c9d4SSatish Balay       }
23769566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v, &a));
2377fa213d2fSHong Zhang     }
23783ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2379fa213d2fSHong Zhang   }
2380fa213d2fSHong Zhang 
23819566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
23829566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
23839566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
23849566063dSJacob Faibussowitsch   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385fa213d2fSHong Zhang 
2386fa213d2fSHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
23879566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388ce496241SStefano Zampini   ba = bav;
2389fa213d2fSHong Zhang   bi = b->i;
2390fa213d2fSHong Zhang   bj = b->j;
23919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392fa213d2fSHong Zhang   for (r = 0; r < m; r++) {
2393fa213d2fSHong Zhang     ncols = bi[r + 1] - bi[r];
2394fa213d2fSHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
23959371c9d4SSatish Balay       offdiagA[r]   = *ba;
23969371c9d4SSatish Balay       offdiagIdx[r] = cmap[0];
2397fa213d2fSHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398fa213d2fSHong Zhang       offdiagA[r] = 0.0;
2399fa213d2fSHong Zhang 
2400fa213d2fSHong Zhang       /* Find first hole in the cmap */
2401fa213d2fSHong Zhang       for (j = 0; j < ncols; j++) {
2402fa213d2fSHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403fa213d2fSHong Zhang         if (col > j && j < cstart) {
2404fa213d2fSHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405fa213d2fSHong Zhang           break;
2406fa213d2fSHong Zhang         } else if (col > j + n && j >= cstart) {
2407fa213d2fSHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408fa213d2fSHong Zhang           break;
2409fa213d2fSHong Zhang         }
2410fa213d2fSHong Zhang       }
24114e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
2412fa213d2fSHong Zhang         /* a hole is outside compressed Bcols */
2413fa213d2fSHong Zhang         if (ncols == 0) {
2414fa213d2fSHong Zhang           if (cstart) {
2415fa213d2fSHong Zhang             offdiagIdx[r] = 0;
2416fa213d2fSHong Zhang           } else offdiagIdx[r] = cend;
2417fa213d2fSHong Zhang         } else { /* ncols > 0 */
2418fa213d2fSHong Zhang           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419fa213d2fSHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420fa213d2fSHong Zhang         }
2421fa213d2fSHong Zhang       }
2422fa213d2fSHong Zhang     }
2423fa213d2fSHong Zhang 
2424fa213d2fSHong Zhang     for (j = 0; j < ncols; j++) {
24259371c9d4SSatish Balay       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
24269371c9d4SSatish Balay         offdiagA[r]   = *ba;
24279371c9d4SSatish Balay         offdiagIdx[r] = cmap[*bj];
24289371c9d4SSatish Balay       }
24299371c9d4SSatish Balay       ba++;
24309371c9d4SSatish Balay       bj++;
2431fa213d2fSHong Zhang     }
2432fa213d2fSHong Zhang   }
2433fa213d2fSHong Zhang 
24349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
24359566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436fa213d2fSHong Zhang   for (r = 0; r < m; ++r) {
2437fa213d2fSHong Zhang     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
243803bc72f1SMatthew Knepley       a[r] = diagA[r];
2439fa213d2fSHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
2440fa213d2fSHong Zhang     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441fa213d2fSHong Zhang       a[r] = diagA[r];
2442fa213d2fSHong Zhang       if (idx) {
2443fa213d2fSHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
244403bc72f1SMatthew Knepley           idx[r] = cstart + diagIdx[r];
2445fa213d2fSHong Zhang         } else idx[r] = offdiagIdx[r];
2446fa213d2fSHong Zhang       }
244703bc72f1SMatthew Knepley     } else {
244803bc72f1SMatthew Knepley       a[r] = offdiagA[r];
2449fa213d2fSHong Zhang       if (idx) idx[r] = offdiagIdx[r];
245003bc72f1SMatthew Knepley     }
245103bc72f1SMatthew Knepley   }
24529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
24539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
24549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
24559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
24569566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
24579566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
24589566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
24593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
246003bc72f1SMatthew Knepley }
246103bc72f1SMatthew Knepley 
2462ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463d71ae5a4SJacob Faibussowitsch {
2464c87e5d42SMatthew Knepley   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
24651a254869SHong Zhang   PetscInt           m = A->rmap->n, n = A->cmap->n;
24661a254869SHong Zhang   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467c87e5d42SMatthew Knepley   PetscInt          *cmap = mat->garray;
2468c87e5d42SMatthew Knepley   PetscInt          *diagIdx, *offdiagIdx;
2469c87e5d42SMatthew Knepley   Vec                diagV, offdiagV;
2470ce496241SStefano Zampini   PetscScalar       *a, *diagA, *offdiagA;
2471ce496241SStefano Zampini   const PetscScalar *ba, *bav;
24721a254869SHong Zhang   PetscInt           r, j, col, ncols, *bi, *bj;
24731a254869SHong Zhang   Mat                B = mat->B;
24741a254869SHong Zhang   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475c87e5d42SMatthew Knepley 
2476c87e5d42SMatthew Knepley   PetscFunctionBegin;
24771a254869SHong Zhang   /* When a process holds entire A and other processes have no entry */
24781a254869SHong Zhang   if (A->cmap->N == n) {
24799566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(v, &diagA));
24809566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
24819566063dSJacob Faibussowitsch     PetscCall(MatGetRowMax(mat->A, diagV, idx));
24829566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&diagV));
24839566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(v, &diagA));
24843ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
24851a254869SHong Zhang   } else if (n == 0) {
24861a254869SHong Zhang     if (m) {
24879566063dSJacob Faibussowitsch       PetscCall(VecGetArrayWrite(v, &a));
24889371c9d4SSatish Balay       for (r = 0; r < m; r++) {
24899371c9d4SSatish Balay         a[r] = PETSC_MIN_REAL;
24909371c9d4SSatish Balay         if (idx) idx[r] = -1;
24919371c9d4SSatish Balay       }
24929566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayWrite(v, &a));
24931a254869SHong Zhang     }
24943ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
24951a254869SHong Zhang   }
24961a254869SHong Zhang 
24979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
24989566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
24999566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
25009566063dSJacob Faibussowitsch   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
25011a254869SHong Zhang 
25021a254869SHong Zhang   /* Get offdiagIdx[] for implicit 0.0 */
25039566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504ce496241SStefano Zampini   ba = bav;
25051a254869SHong Zhang   bi = b->i;
25061a254869SHong Zhang   bj = b->j;
25079566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
25081a254869SHong Zhang   for (r = 0; r < m; r++) {
25091a254869SHong Zhang     ncols = bi[r + 1] - bi[r];
25101a254869SHong Zhang     if (ncols == A->cmap->N - n) { /* Brow is dense */
25119371c9d4SSatish Balay       offdiagA[r]   = *ba;
25129371c9d4SSatish Balay       offdiagIdx[r] = cmap[0];
25131a254869SHong Zhang     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
25141a254869SHong Zhang       offdiagA[r] = 0.0;
25151a254869SHong Zhang 
25161a254869SHong Zhang       /* Find first hole in the cmap */
25171a254869SHong Zhang       for (j = 0; j < ncols; j++) {
25181a254869SHong Zhang         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
25191a254869SHong Zhang         if (col > j && j < cstart) {
25201a254869SHong Zhang           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
25211a254869SHong Zhang           break;
25221a254869SHong Zhang         } else if (col > j + n && j >= cstart) {
25231a254869SHong Zhang           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
25241a254869SHong Zhang           break;
25251a254869SHong Zhang         }
25261a254869SHong Zhang       }
25274e879edeSHong Zhang       if (j == ncols && ncols < A->cmap->N - n) {
25281a254869SHong Zhang         /* a hole is outside compressed Bcols */
25291a254869SHong Zhang         if (ncols == 0) {
25301a254869SHong Zhang           if (cstart) {
25311a254869SHong Zhang             offdiagIdx[r] = 0;
25321a254869SHong Zhang           } else offdiagIdx[r] = cend;
25331a254869SHong Zhang         } else { /* ncols > 0 */
25341a254869SHong Zhang           offdiagIdx[r] = cmap[ncols - 1] + 1;
25351a254869SHong Zhang           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
25361a254869SHong Zhang         }
25371a254869SHong Zhang       }
25381a254869SHong Zhang     }
25391a254869SHong Zhang 
25401a254869SHong Zhang     for (j = 0; j < ncols; j++) {
25419371c9d4SSatish Balay       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
25429371c9d4SSatish Balay         offdiagA[r]   = *ba;
25439371c9d4SSatish Balay         offdiagIdx[r] = cmap[*bj];
25449371c9d4SSatish Balay       }
25459371c9d4SSatish Balay       ba++;
25469371c9d4SSatish Balay       bj++;
25471a254869SHong Zhang     }
25481a254869SHong Zhang   }
25491a254869SHong Zhang 
25509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &a));
25519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
25521a254869SHong Zhang   for (r = 0; r < m; ++r) {
25531a254869SHong Zhang     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554c87e5d42SMatthew Knepley       a[r] = diagA[r];
25551a254869SHong Zhang       if (idx) idx[r] = cstart + diagIdx[r];
25561a254869SHong Zhang     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
25571a254869SHong Zhang       a[r] = diagA[r];
25581a254869SHong Zhang       if (idx) {
25591a254869SHong Zhang         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560c87e5d42SMatthew Knepley           idx[r] = cstart + diagIdx[r];
25611a254869SHong Zhang         } else idx[r] = offdiagIdx[r];
25621a254869SHong Zhang       }
2563c87e5d42SMatthew Knepley     } else {
2564c87e5d42SMatthew Knepley       a[r] = offdiagA[r];
25651a254869SHong Zhang       if (idx) idx[r] = offdiagIdx[r];
2566c87e5d42SMatthew Knepley     }
2567c87e5d42SMatthew Knepley   }
25689566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
25699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(v, &a));
25709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
25719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
25729566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&diagV));
25739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&offdiagV));
25749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(diagIdx, offdiagIdx));
25753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2576c87e5d42SMatthew Knepley }
2577c87e5d42SMatthew Knepley 
2578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579d71ae5a4SJacob Faibussowitsch {
2580f6d58c54SBarry Smith   Mat *dummy;
25815494a064SHong Zhang 
25825494a064SHong Zhang   PetscFunctionBegin;
25839566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584f6d58c54SBarry Smith   *newmat = *dummy;
25859566063dSJacob Faibussowitsch   PetscCall(PetscFree(dummy));
25863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25875494a064SHong Zhang }
25885494a064SHong Zhang 
2589ba38deedSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590d71ae5a4SJacob Faibussowitsch {
2591bbead8a2SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592bbead8a2SBarry Smith 
2593bbead8a2SBarry Smith   PetscFunctionBegin;
25949566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(a->A, values));
25957b6c816cSBarry Smith   A->factorerrortype = a->A->factorerrortype;
25963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2597bbead8a2SBarry Smith }
2598bbead8a2SBarry Smith 
2599d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600d71ae5a4SJacob Faibussowitsch {
260173a71a0fSBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
260273a71a0fSBarry Smith 
260373a71a0fSBarry Smith   PetscFunctionBegin;
260408401ef6SPierre Jolivet   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
26059566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(aij->A, rctx));
2606679944adSJunchao Zhang   if (x->assembled) {
26079566063dSJacob Faibussowitsch     PetscCall(MatSetRandom(aij->B, rctx));
2608679944adSJunchao Zhang   } else {
26099566063dSJacob Faibussowitsch     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610679944adSJunchao Zhang   }
26119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
26129566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
26133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
261473a71a0fSBarry Smith }
2615bbead8a2SBarry Smith 
2616ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617d71ae5a4SJacob Faibussowitsch {
2618b1b1104fSBarry Smith   PetscFunctionBegin;
2619b1b1104fSBarry Smith   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620b1b1104fSBarry Smith   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
26213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2622b1b1104fSBarry Smith }
2623b1b1104fSBarry Smith 
2624b1b1104fSBarry Smith /*@
2625f2afee66SBarry Smith   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626f2afee66SBarry Smith 
26272ef1f0ffSBarry Smith   Not Collective
2628f2afee66SBarry Smith 
2629f2afee66SBarry Smith   Input Parameter:
2630f2afee66SBarry Smith . A - the matrix
2631f2afee66SBarry Smith 
2632f2afee66SBarry Smith   Output Parameter:
2633f2afee66SBarry Smith . nz - the number of nonzeros
2634f2afee66SBarry Smith 
2635f2afee66SBarry Smith   Level: advanced
2636f2afee66SBarry Smith 
2637fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2638f2afee66SBarry Smith @*/
2639d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640d71ae5a4SJacob Faibussowitsch {
2641f2afee66SBarry Smith   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642f2afee66SBarry Smith   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643dfebb78cSStefano Zampini   PetscBool   isaij;
2644f2afee66SBarry Smith 
2645f2afee66SBarry Smith   PetscFunctionBegin;
2646dfebb78cSStefano Zampini   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2647dfebb78cSStefano Zampini   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2648f2afee66SBarry Smith   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
26493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2650f2afee66SBarry Smith }
2651f2afee66SBarry Smith 
2652f2afee66SBarry Smith /*@
2653b1b1104fSBarry Smith   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2654b1b1104fSBarry Smith 
2655c3339decSBarry Smith   Collective
2656b1b1104fSBarry Smith 
2657b1b1104fSBarry Smith   Input Parameters:
2658b1b1104fSBarry Smith + A  - the matrix
265911a5261eSBarry Smith - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2660b1b1104fSBarry Smith 
266196a0c994SBarry Smith   Level: advanced
266296a0c994SBarry Smith 
2663fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2664b1b1104fSBarry Smith @*/
2665d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2666d71ae5a4SJacob Faibussowitsch {
2667b1b1104fSBarry Smith   PetscFunctionBegin;
2668cac4c232SBarry Smith   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
26693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2670b1b1104fSBarry Smith }
2671b1b1104fSBarry Smith 
2672d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2673d71ae5a4SJacob Faibussowitsch {
2674b1b1104fSBarry Smith   PetscBool sc = PETSC_FALSE, flg;
2675b1b1104fSBarry Smith 
2676b1b1104fSBarry Smith   PetscFunctionBegin;
2677d0609cedSBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2678b1b1104fSBarry Smith   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
26799566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
26801baa6e33SBarry Smith   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2681d0609cedSBarry Smith   PetscOptionsHeadEnd();
26823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2683b1b1104fSBarry Smith }
2684b1b1104fSBarry Smith 
2685ba38deedSJacob Faibussowitsch static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2686d71ae5a4SJacob Faibussowitsch {
26877d68702bSBarry Smith   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2688c5e4d11fSDmitry Karpeev   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
26897d68702bSBarry Smith 
26907d68702bSBarry Smith   PetscFunctionBegin;
2691c5e4d11fSDmitry Karpeev   if (!Y->preallocated) {
26929566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
26935519a089SJose E. Roman   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2694b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
26959566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2696b83222d8SBarry Smith     aij->nonew = nonew;
26977d68702bSBarry Smith   }
26989566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
26993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27007d68702bSBarry Smith }
27017d68702bSBarry Smith 
2702ba38deedSJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2703d71ae5a4SJacob Faibussowitsch {
27043b49f96aSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
27053b49f96aSBarry Smith 
27063b49f96aSBarry Smith   PetscFunctionBegin;
270708401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
27089566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
27093b49f96aSBarry Smith   if (d) {
27103b49f96aSBarry Smith     PetscInt rstart;
27119566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
27123b49f96aSBarry Smith     *d += rstart;
27133b49f96aSBarry Smith   }
27143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27153b49f96aSBarry Smith }
27163b49f96aSBarry Smith 
2717ba38deedSJacob Faibussowitsch static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2718d71ae5a4SJacob Faibussowitsch {
2719a8ee9fb5SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2720a8ee9fb5SBarry Smith 
2721a8ee9fb5SBarry Smith   PetscFunctionBegin;
27229566063dSJacob Faibussowitsch   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
27233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2724a8ee9fb5SBarry Smith }
27253b49f96aSBarry Smith 
272658c11ad4SPierre Jolivet static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2727dec0b466SHong Zhang {
2728dec0b466SHong Zhang   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2729dec0b466SHong Zhang 
2730dec0b466SHong Zhang   PetscFunctionBegin;
273158c11ad4SPierre Jolivet   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
273258c11ad4SPierre Jolivet   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
27333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2734dec0b466SHong Zhang }
2735dec0b466SHong Zhang 
2736cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2737cda55fadSBarry Smith                                        MatGetRow_MPIAIJ,
2738cda55fadSBarry Smith                                        MatRestoreRow_MPIAIJ,
2739cda55fadSBarry Smith                                        MatMult_MPIAIJ,
274097304618SKris Buschelman                                        /* 4*/ MatMultAdd_MPIAIJ,
27417c922b88SBarry Smith                                        MatMultTranspose_MPIAIJ,
27427c922b88SBarry Smith                                        MatMultTransposeAdd_MPIAIJ,
2743f4259b30SLisandro Dalcin                                        NULL,
2744f4259b30SLisandro Dalcin                                        NULL,
2745f4259b30SLisandro Dalcin                                        NULL,
2746f4259b30SLisandro Dalcin                                        /*10*/ NULL,
2747f4259b30SLisandro Dalcin                                        NULL,
2748f4259b30SLisandro Dalcin                                        NULL,
274941f059aeSBarry Smith                                        MatSOR_MPIAIJ,
2750b7c46309SBarry Smith                                        MatTranspose_MPIAIJ,
275197304618SKris Buschelman                                        /*15*/ MatGetInfo_MPIAIJ,
2752cda55fadSBarry Smith                                        MatEqual_MPIAIJ,
2753cda55fadSBarry Smith                                        MatGetDiagonal_MPIAIJ,
2754cda55fadSBarry Smith                                        MatDiagonalScale_MPIAIJ,
2755cda55fadSBarry Smith                                        MatNorm_MPIAIJ,
275697304618SKris Buschelman                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2757cda55fadSBarry Smith                                        MatAssemblyEnd_MPIAIJ,
2758cda55fadSBarry Smith                                        MatSetOption_MPIAIJ,
2759cda55fadSBarry Smith                                        MatZeroEntries_MPIAIJ,
2760d519adbfSMatthew Knepley                                        /*24*/ MatZeroRows_MPIAIJ,
2761f4259b30SLisandro Dalcin                                        NULL,
2762f4259b30SLisandro Dalcin                                        NULL,
2763f4259b30SLisandro Dalcin                                        NULL,
2764f4259b30SLisandro Dalcin                                        NULL,
276526cec326SBarry Smith                                        /*29*/ MatSetUp_MPI_Hash,
2766f4259b30SLisandro Dalcin                                        NULL,
2767f4259b30SLisandro Dalcin                                        NULL,
2768a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIAIJ,
2769f4259b30SLisandro Dalcin                                        NULL,
2770d519adbfSMatthew Knepley                                        /*34*/ MatDuplicate_MPIAIJ,
2771f4259b30SLisandro Dalcin                                        NULL,
2772f4259b30SLisandro Dalcin                                        NULL,
2773f4259b30SLisandro Dalcin                                        NULL,
2774f4259b30SLisandro Dalcin                                        NULL,
2775d519adbfSMatthew Knepley                                        /*39*/ MatAXPY_MPIAIJ,
27767dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIAIJ,
2777cda55fadSBarry Smith                                        MatIncreaseOverlap_MPIAIJ,
2778cda55fadSBarry Smith                                        MatGetValues_MPIAIJ,
2779cb5b572fSBarry Smith                                        MatCopy_MPIAIJ,
2780d519adbfSMatthew Knepley                                        /*44*/ MatGetRowMax_MPIAIJ,
2781cda55fadSBarry Smith                                        MatScale_MPIAIJ,
27827d68702bSBarry Smith                                        MatShift_MPIAIJ,
278399e65526SBarry Smith                                        MatDiagonalSet_MPIAIJ,
2784564f14d6SBarry Smith                                        MatZeroRowsColumns_MPIAIJ,
278573a71a0fSBarry Smith                                        /*49*/ MatSetRandom_MPIAIJ,
27868a9c020eSBarry Smith                                        MatGetRowIJ_MPIAIJ,
27878a9c020eSBarry Smith                                        MatRestoreRowIJ_MPIAIJ,
2788f4259b30SLisandro Dalcin                                        NULL,
2789f4259b30SLisandro Dalcin                                        NULL,
279093dfae19SHong Zhang                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2791f4259b30SLisandro Dalcin                                        NULL,
2792cda55fadSBarry Smith                                        MatSetUnfactored_MPIAIJ,
279372e6a0cfSJed Brown                                        MatPermute_MPIAIJ,
2794f4259b30SLisandro Dalcin                                        NULL,
27957dae84e0SHong Zhang                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2796e03a110bSBarry Smith                                        MatDestroy_MPIAIJ,
2797e03a110bSBarry Smith                                        MatView_MPIAIJ,
2798f4259b30SLisandro Dalcin                                        NULL,
2799f4259b30SLisandro Dalcin                                        NULL,
2800f4259b30SLisandro Dalcin                                        /*64*/ NULL,
2801f996eeb8SHong Zhang                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2802f4259b30SLisandro Dalcin                                        NULL,
2803f4259b30SLisandro Dalcin                                        NULL,
2804f4259b30SLisandro Dalcin                                        NULL,
2805d519adbfSMatthew Knepley                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2806c87e5d42SMatthew Knepley                                        MatGetRowMinAbs_MPIAIJ,
2807f4259b30SLisandro Dalcin                                        NULL,
2808f4259b30SLisandro Dalcin                                        NULL,
2809f4259b30SLisandro Dalcin                                        NULL,
2810f4259b30SLisandro Dalcin                                        NULL,
28113acb8795SBarry Smith                                        /*75*/ MatFDColoringApply_AIJ,
2812b1b1104fSBarry Smith                                        MatSetFromOptions_MPIAIJ,
2813f4259b30SLisandro Dalcin                                        NULL,
2814f4259b30SLisandro Dalcin                                        NULL,
2815f1f41ecbSJed Brown                                        MatFindZeroDiagonals_MPIAIJ,
2816f4259b30SLisandro Dalcin                                        /*80*/ NULL,
2817f4259b30SLisandro Dalcin                                        NULL,
2818f4259b30SLisandro Dalcin                                        NULL,
28195bba2384SShri Abhyankar                                        /*83*/ MatLoad_MPIAIJ,
28206cff0a6bSPierre Jolivet                                        NULL,
2821f4259b30SLisandro Dalcin                                        NULL,
2822f4259b30SLisandro Dalcin                                        NULL,
2823f4259b30SLisandro Dalcin                                        NULL,
2824f4259b30SLisandro Dalcin                                        NULL,
2825f4259b30SLisandro Dalcin                                        /*89*/ NULL,
2826f4259b30SLisandro Dalcin                                        NULL,
282726be0446SHong Zhang                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2828f4259b30SLisandro Dalcin                                        NULL,
2829f4259b30SLisandro Dalcin                                        NULL,
2830cf3ca8ceSHong Zhang                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2831f4259b30SLisandro Dalcin                                        NULL,
2832f4259b30SLisandro Dalcin                                        NULL,
2833f4259b30SLisandro Dalcin                                        NULL,
2834b470e4b4SRichard Tran Mills                                        MatBindToCPU_MPIAIJ,
28354222ddf1SHong Zhang                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2836f4259b30SLisandro Dalcin                                        NULL,
2837f4259b30SLisandro Dalcin                                        NULL,
28382fd7e33dSBarry Smith                                        MatConjugate_MPIAIJ,
2839f4259b30SLisandro Dalcin                                        NULL,
2840d519adbfSMatthew Knepley                                        /*104*/ MatSetValuesRow_MPIAIJ,
284199cafbc1SBarry Smith                                        MatRealPart_MPIAIJ,
284269db28dcSHong Zhang                                        MatImaginaryPart_MPIAIJ,
2843f4259b30SLisandro Dalcin                                        NULL,
2844f4259b30SLisandro Dalcin                                        NULL,
2845f4259b30SLisandro Dalcin                                        /*109*/ NULL,
2846f4259b30SLisandro Dalcin                                        NULL,
28475494a064SHong Zhang                                        MatGetRowMin_MPIAIJ,
2848f4259b30SLisandro Dalcin                                        NULL,
28493b49f96aSBarry Smith                                        MatMissingDiagonal_MPIAIJ,
2850d1adec66SJed Brown                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2851f4259b30SLisandro Dalcin                                        NULL,
2852c5e4d11fSDmitry Karpeev                                        MatGetGhosts_MPIAIJ,
2853f4259b30SLisandro Dalcin                                        NULL,
2854f4259b30SLisandro Dalcin                                        NULL,
2855b215bc84SStefano Zampini                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2856f4259b30SLisandro Dalcin                                        NULL,
2857f4259b30SLisandro Dalcin                                        NULL,
2858f4259b30SLisandro Dalcin                                        NULL,
2859b9614d88SDmitry Karpeev                                        MatGetMultiProcBlock_MPIAIJ,
2860f2c98031SJed Brown                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2861a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIAIJ,
2862bbead8a2SBarry Smith                                        MatInvertBlockDiagonal_MPIAIJ,
2863a8ee9fb5SBarry Smith                                        MatInvertVariableBlockDiagonal_MPIAIJ,
28647dae84e0SHong Zhang                                        MatCreateSubMatricesMPI_MPIAIJ,
2865f4259b30SLisandro Dalcin                                        /*129*/ NULL,
2866f4259b30SLisandro Dalcin                                        NULL,
2867f4259b30SLisandro Dalcin                                        NULL,
2868187b3c17SHong Zhang                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2869f4259b30SLisandro Dalcin                                        NULL,
2870f4259b30SLisandro Dalcin                                        /*134*/ NULL,
2871f4259b30SLisandro Dalcin                                        NULL,
2872f4259b30SLisandro Dalcin                                        NULL,
2873f4259b30SLisandro Dalcin                                        NULL,
2874f4259b30SLisandro Dalcin                                        NULL,
287546533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2876f4259b30SLisandro Dalcin                                        NULL,
2877f4259b30SLisandro Dalcin                                        NULL,
28789c8f2541SHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2879a0b6529bSBarry Smith                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
28804222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2881f4259b30SLisandro Dalcin                                        /*145*/ NULL,
2882f4259b30SLisandro Dalcin                                        NULL,
288372833a62Smarkadams4                                        NULL,
288472833a62Smarkadams4                                        MatCreateGraph_Simple_AIJ,
28852d776b49SBarry Smith                                        NULL,
2886dec0b466SHong Zhang                                        /*150*/ NULL,
2887eede4a3fSMark Adams                                        MatEliminateZeros_MPIAIJ,
28884cc2b5b5SPierre Jolivet                                        MatGetRowSumAbs_MPIAIJ,
288942ce410bSJunchao Zhang                                        NULL,
289042ce410bSJunchao Zhang                                        NULL,
2891fe1fc275SAlexander                                        /*155*/ NULL,
2892fe1fc275SAlexander                                        MatCopyHashToXAIJ_MPI_Hash};
289336ce4990SBarry Smith 
2894ba38deedSJacob Faibussowitsch static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2895d71ae5a4SJacob Faibussowitsch {
28962e8a6d31SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
28972e8a6d31SBarry Smith 
28982e8a6d31SBarry Smith   PetscFunctionBegin;
28999566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
29009566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
29013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29022e8a6d31SBarry Smith }
29032e8a6d31SBarry Smith 
2904ba38deedSJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2905d71ae5a4SJacob Faibussowitsch {
29062e8a6d31SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
29072e8a6d31SBarry Smith 
29082e8a6d31SBarry Smith   PetscFunctionBegin;
29099566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
29109566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
29113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29122e8a6d31SBarry Smith }
29138a729477SBarry Smith 
2914d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2915d71ae5a4SJacob Faibussowitsch {
2916ad79cf63SBarry Smith   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
29175d2a9ed1SStefano Zampini   PetscMPIInt size;
2918a23d5eceSKris Buschelman 
2919a23d5eceSKris Buschelman   PetscFunctionBegin;
2920ad79cf63SBarry Smith   if (B->hash_active) {
2921aea10558SJacob Faibussowitsch     B->ops[0]      = b->cops;
2922ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
2923ad79cf63SBarry Smith   }
29249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
29259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
2926899cda47SBarry Smith 
2927cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
2928eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&b->colmap));
2929cb7b82ddSBarry Smith #else
29309566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2931cb7b82ddSBarry Smith #endif
29329566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
29339566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
29349566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2935cb7b82ddSBarry Smith 
29369566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2937c508b908SBarry Smith 
2938c508b908SBarry Smith   MatSeqXAIJGetOptions_Private(b->B);
29399566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
29409566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
29419566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
29429566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
29439566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQAIJ));
2944c508b908SBarry Smith   MatSeqXAIJRestoreOptions_Private(b->B);
2945cb7b82ddSBarry Smith 
2946c508b908SBarry Smith   MatSeqXAIJGetOptions_Private(b->A);
2947ad79cf63SBarry Smith   PetscCall(MatDestroy(&b->A));
29489566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
29499566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
29509566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
29519566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->A, MATSEQAIJ));
2952c508b908SBarry Smith   MatSeqXAIJRestoreOptions_Private(b->A);
2953899cda47SBarry Smith 
29549566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
29559566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2956526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2957cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
295815001458SStefano Zampini   B->assembled     = PETSC_FALSE;
29593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2960a23d5eceSKris Buschelman }
2961a23d5eceSKris Buschelman 
2962ba38deedSJacob Faibussowitsch static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2963d71ae5a4SJacob Faibussowitsch {
2964ad79cf63SBarry Smith   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2965846b4da1SFande Kong 
2966846b4da1SFande Kong   PetscFunctionBegin;
2967846b4da1SFande Kong   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
29689566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
29699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
2970846b4da1SFande Kong 
2971846b4da1SFande Kong #if defined(PETSC_USE_CTABLE)
2972eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&b->colmap));
2973846b4da1SFande Kong #else
29749566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2975846b4da1SFande Kong #endif
29769566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
29779566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
29789566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2979846b4da1SFande Kong 
29809566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocation(b->A));
29819566063dSJacob Faibussowitsch   PetscCall(MatResetPreallocation(b->B));
2982846b4da1SFande Kong   B->preallocated  = PETSC_TRUE;
2983846b4da1SFande Kong   B->was_assembled = PETSC_FALSE;
2984846b4da1SFande Kong   B->assembled     = PETSC_FALSE;
29853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2986846b4da1SFande Kong }
2987846b4da1SFande Kong 
2988d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2989d71ae5a4SJacob Faibussowitsch {
2990d6dfbf8fSBarry Smith   Mat         mat;
2991416022c9SBarry Smith   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2992d6dfbf8fSBarry Smith 
29933a40ed3dSBarry Smith   PetscFunctionBegin;
2994f4259b30SLisandro Dalcin   *newmat = NULL;
29959566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
29969566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
29979566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
29989566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2999273d9f13SBarry Smith   a = (Mat_MPIAIJ *)mat->data;
3000e1b6402fSHong Zhang 
3001d5f3da31SBarry Smith   mat->factortype = matin->factortype;
3002501880eeSStefano Zampini   mat->assembled  = matin->assembled;
3003e7641de0SSatish Balay   mat->insertmode = NOT_SET_VALUES;
3004d6dfbf8fSBarry Smith 
300517699dbbSLois Curfman McInnes   a->size         = oldmat->size;
300617699dbbSLois Curfman McInnes   a->rank         = oldmat->rank;
3007e7641de0SSatish Balay   a->donotstash   = oldmat->donotstash;
3008e7641de0SSatish Balay   a->roworiented  = oldmat->roworiented;
3009501880eeSStefano Zampini   a->rowindices   = NULL;
3010501880eeSStefano Zampini   a->rowvalues    = NULL;
3011bcd2baecSBarry Smith   a->getrowactive = PETSC_FALSE;
3012d6dfbf8fSBarry Smith 
30139566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
30149566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3015420957c1SBarry Smith   if (matin->hash_active) {
3016420957c1SBarry Smith     PetscCall(MatSetUp(mat));
3017420957c1SBarry Smith   } else {
3018420957c1SBarry Smith     mat->preallocated = matin->preallocated;
30192ee70a88SLois Curfman McInnes     if (oldmat->colmap) {
3020aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
3021eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3022b1fc9764SSatish Balay #else
30239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
30249566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3025b1fc9764SSatish Balay #endif
3026501880eeSStefano Zampini     } else a->colmap = NULL;
30273f41c07dSBarry Smith     if (oldmat->garray) {
3028b1d57f15SBarry Smith       PetscInt len;
3029d0f46423SBarry Smith       len = oldmat->B->cmap->n;
30309566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(len + 1, &a->garray));
30319566063dSJacob Faibussowitsch       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3032501880eeSStefano Zampini     } else a->garray = NULL;
3033d6dfbf8fSBarry Smith 
30340de76c62SStefano Zampini     /* It may happen MatDuplicate is called with a non-assembled matrix
30350de76c62SStefano Zampini       In fact, MatDuplicate only requires the matrix to be preallocated
30360de76c62SStefano Zampini       This may happen inside a DMCreateMatrix_Shell */
3037aa624791SPierre Jolivet     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3038cff58d65SJunchao Zhang     if (oldmat->Mvctx) {
3039cff58d65SJunchao Zhang       a->Mvctx = oldmat->Mvctx;
3040cff58d65SJunchao Zhang       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3041cff58d65SJunchao Zhang     }
30429566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
30439566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3044420957c1SBarry Smith   }
30459566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
30468a729477SBarry Smith   *newmat = mat;
30473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30488a729477SBarry Smith }
3049416022c9SBarry Smith 
3050d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3051d71ae5a4SJacob Faibussowitsch {
305252f91c60SVaclav Hapla   PetscBool isbinary, ishdf5;
305352f91c60SVaclav Hapla 
305452f91c60SVaclav Hapla   PetscFunctionBegin;
305552f91c60SVaclav Hapla   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
305652f91c60SVaclav Hapla   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3057c27b3999SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
30589566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
30599566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
30609566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
306152f91c60SVaclav Hapla   if (isbinary) {
30629566063dSJacob Faibussowitsch     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
306352f91c60SVaclav Hapla   } else if (ishdf5) {
306452f91c60SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
30659566063dSJacob Faibussowitsch     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
306652f91c60SVaclav Hapla #else
306752f91c60SVaclav Hapla     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
306852f91c60SVaclav Hapla #endif
306952f91c60SVaclav Hapla   } else {
307098921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
307152f91c60SVaclav Hapla   }
30723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
307352f91c60SVaclav Hapla }
307452f91c60SVaclav Hapla 
3075d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3076d71ae5a4SJacob Faibussowitsch {
30773ea6fe3dSLisandro Dalcin   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
30783ea6fe3dSLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
30793ea6fe3dSLisandro Dalcin   PetscScalar *matvals;
30808fb81238SShri Abhyankar 
30818fb81238SShri Abhyankar   PetscFunctionBegin;
30829566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
30838fb81238SShri Abhyankar 
30843ea6fe3dSLisandro Dalcin   /* read in matrix header */
30859566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
308608401ef6SPierre Jolivet   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
30879371c9d4SSatish Balay   M  = header[1];
30889371c9d4SSatish Balay   N  = header[2];
30899371c9d4SSatish Balay   nz = header[3];
309008401ef6SPierre Jolivet   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
309108401ef6SPierre Jolivet   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
309208401ef6SPierre Jolivet   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
309308ea439dSMark F. Adams 
30943ea6fe3dSLisandro Dalcin   /* set block sizes from the viewer's .info file */
30959566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
30963ea6fe3dSLisandro Dalcin   /* set global sizes if not set already */
30973ea6fe3dSLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
30983ea6fe3dSLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
30999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
31009566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
31018fb81238SShri Abhyankar 
31023ea6fe3dSLisandro Dalcin   /* check if the matrix sizes are correct */
31039566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
3104aed4548fSBarry Smith   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
31058fb81238SShri Abhyankar 
31063ea6fe3dSLisandro Dalcin   /* read in row lengths and build row indices */
31079566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, NULL));
31089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
31099566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
31109371c9d4SSatish Balay   rowidxs[0] = 0;
31119371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
31121690c2aeSBarry Smith   if (nz != PETSC_INT_MAX) {
3113462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
311408401ef6SPierre Jolivet     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
311538b83642SBarry Smith   }
311638b83642SBarry Smith 
31173ea6fe3dSLisandro Dalcin   /* read in column indices and matrix values */
31189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
31199566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
31209566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
31213ea6fe3dSLisandro Dalcin   /* store matrix indices and values */
31229566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
31239566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
31249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
31253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31268fb81238SShri Abhyankar }
31278fb81238SShri Abhyankar 
31283782ecc7SHong Zhang /* Not scalable because of ISAllGather() unless getting all columns. */
3129ba38deedSJacob Faibussowitsch static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3130d71ae5a4SJacob Faibussowitsch {
31314aa3045dSJed Brown   IS          iscol_local;
3132c5e4d11fSDmitry Karpeev   PetscBool   isstride;
3133c5e4d11fSDmitry Karpeev   PetscMPIInt lisstride = 0, gisstride;
31343782ecc7SHong Zhang 
31353782ecc7SHong Zhang   PetscFunctionBegin;
31363782ecc7SHong Zhang   /* check if we are grabbing all columns*/
31379566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
31383782ecc7SHong Zhang 
3139c5e4d11fSDmitry Karpeev   if (isstride) {
3140c5e4d11fSDmitry Karpeev     PetscInt start, len, mstart, mlen;
31419566063dSJacob Faibussowitsch     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
31429566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &len));
31439566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3144c5e4d11fSDmitry Karpeev     if (mstart == start && mlen - mstart == len) lisstride = 1;
3145c5e4d11fSDmitry Karpeev   }
31463782ecc7SHong Zhang 
3147462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3148c5e4d11fSDmitry Karpeev   if (gisstride) {
3149c5e4d11fSDmitry Karpeev     PetscInt N;
31509566063dSJacob Faibussowitsch     PetscCall(MatGetSize(mat, NULL, &N));
31519566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
31529566063dSJacob Faibussowitsch     PetscCall(ISSetIdentity(iscol_local));
31539566063dSJacob Faibussowitsch     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3154c5e4d11fSDmitry Karpeev   } else {
3155c5bfad50SMark F. Adams     PetscInt cbs;
31569566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(iscol, &cbs));
31579566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
31589566063dSJacob Faibussowitsch     PetscCall(ISSetBlockSize(iscol_local, cbs));
3159b79d0421SJed Brown   }
31603782ecc7SHong Zhang 
31613782ecc7SHong Zhang   *isseq = iscol_local;
31623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3163c5e4d11fSDmitry Karpeev }
31648d2139bdSHong Zhang 
3165ddfdf956SHong Zhang /*
31669c988bcaSHong Zhang  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
31679c988bcaSHong Zhang  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3168ddfdf956SHong Zhang 
3169ddfdf956SHong Zhang  Input Parameters:
317027430b45SBarry Smith +   mat - matrix
317127430b45SBarry Smith .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
31729c988bcaSHong Zhang            i.e., mat->rstart <= isrow[i] < mat->rend
317327430b45SBarry Smith -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3174ddfdf956SHong Zhang            i.e., mat->cstart <= iscol[i] < mat->cend
317527430b45SBarry Smith 
317627430b45SBarry Smith  Output Parameters:
317727430b45SBarry Smith +   isrow_d - sequential row index set for retrieving mat->A
317827430b45SBarry Smith .   iscol_d - sequential  column index set for retrieving mat->A
317927430b45SBarry Smith .   iscol_o - sequential column index set for retrieving mat->B
318027430b45SBarry Smith -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3181ddfdf956SHong Zhang  */
3182835f2295SStefano Zampini static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3183d71ae5a4SJacob Faibussowitsch {
3184040216a4SHong Zhang   Vec             x, cmap;
3185040216a4SHong Zhang   const PetscInt *is_idx;
3186040216a4SHong Zhang   PetscScalar    *xarray, *cmaparray;
31879c988bcaSHong Zhang   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3188040216a4SHong Zhang   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3189040216a4SHong Zhang   Mat             B    = a->B;
3190040216a4SHong Zhang   Vec             lvec = a->lvec, lcmap;
3191a31a438cSHong Zhang   PetscInt        i, cstart, cend, Bn = B->cmap->N;
31928b3fa1f7SHong Zhang   MPI_Comm        comm;
31933a8d973cSHong Zhang   VecScatter      Mvctx = a->Mvctx;
31943782ecc7SHong Zhang 
31953782ecc7SHong Zhang   PetscFunctionBegin;
31969566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
31979566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &ncols));
31988b3fa1f7SHong Zhang 
3199ddfdf956SHong Zhang   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
32009566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(mat, &x, NULL));
32019566063dSJacob Faibussowitsch   PetscCall(VecSet(x, -1.0));
32029566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(x, &cmap));
32039566063dSJacob Faibussowitsch   PetscCall(VecSet(cmap, -1.0));
32040a351717SHong Zhang 
32059c988bcaSHong Zhang   /* Get start indices */
32069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3207ddfdf956SHong Zhang   isstart -= ncols;
32089566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3209040216a4SHong Zhang 
32109566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol, &is_idx));
32119566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &xarray));
32129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(cmap, &cmaparray));
32139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ncols, &idx));
3214ddfdf956SHong Zhang   for (i = 0; i < ncols; i++) {
32158b3fa1f7SHong Zhang     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3216ddfdf956SHong Zhang     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
32179c988bcaSHong Zhang     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
32188b3fa1f7SHong Zhang   }
32199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &xarray));
32209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(cmap, &cmaparray));
32219566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &is_idx));
32228b3fa1f7SHong Zhang 
32239c988bcaSHong Zhang   /* Get iscol_d */
32249566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
32259566063dSJacob Faibussowitsch   PetscCall(ISGetBlockSize(iscol, &i));
32269566063dSJacob Faibussowitsch   PetscCall(ISSetBlockSize(*iscol_d, i));
3227feb78a15SHong Zhang 
32289c988bcaSHong Zhang   /* Get isrow_d */
32299566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &m));
3230feb78a15SHong Zhang   rstart = mat->rmap->rstart;
32319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &idx));
32329566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &is_idx));
32339c988bcaSHong Zhang   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
32349566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &is_idx));
3235feb78a15SHong Zhang 
32369566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
32379566063dSJacob Faibussowitsch   PetscCall(ISGetBlockSize(isrow, &i));
32389566063dSJacob Faibussowitsch   PetscCall(ISSetBlockSize(*isrow_d, i));
3239feb78a15SHong Zhang 
32409c988bcaSHong Zhang   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
32419566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
32429566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3243ddfdf956SHong Zhang 
32449566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(lvec, &lcmap));
324507250d77SHong Zhang 
32469566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
32479566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
324864efcef9SHong Zhang 
32499c988bcaSHong Zhang   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3250ddfdf956SHong Zhang   /* off-process column indices */
32519c988bcaSHong Zhang   count = 0;
32529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bn, &idx));
32539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bn, &cmap1));
3254feb78a15SHong Zhang 
32559566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lvec, &xarray));
32569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lcmap, &cmaparray));
32578b3fa1f7SHong Zhang   for (i = 0; i < Bn; i++) {
3258f73421bfSHong Zhang     if (PetscRealPart(xarray[i]) > -1.0) {
32599c988bcaSHong Zhang       idx[count]   = i;                                     /* local column index in off-diagonal part B */
32601c645242SHong Zhang       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
32611c645242SHong Zhang       count++;
32628b3fa1f7SHong Zhang     }
32638b3fa1f7SHong Zhang   }
32649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lvec, &xarray));
32659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lcmap, &cmaparray));
326607250d77SHong Zhang 
32679566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3268b6d9b4e0SHong Zhang   /* cannot ensure iscol_o has same blocksize as iscol! */
3269b6d9b4e0SHong Zhang 
32709566063dSJacob Faibussowitsch   PetscCall(PetscFree(idx));
32719c988bcaSHong Zhang   *garray = cmap1;
32729c988bcaSHong Zhang 
32739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&x));
32749566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&cmap));
32759566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lcmap));
32763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3277040216a4SHong Zhang }
3278040216a4SHong Zhang 
3279b20e2604SHong Zhang /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3280d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3281d71ae5a4SJacob Faibussowitsch {
3282b20e2604SHong Zhang   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
32831fd43edeSHong Zhang   Mat         M = NULL;
32843b00a383SHong Zhang   MPI_Comm    comm;
3285b20e2604SHong Zhang   IS          iscol_d, isrow_d, iscol_o;
32863b00a383SHong Zhang   Mat         Asub = NULL, Bsub = NULL;
3287b20e2604SHong Zhang   PetscInt    n;
32883b00a383SHong Zhang 
32893b00a383SHong Zhang   PetscFunctionBegin;
32909566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
32913b00a383SHong Zhang 
32923b00a383SHong Zhang   if (call == MAT_REUSE_MATRIX) {
3293b20e2604SHong Zhang     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
32949566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
329528b400f6SJacob Faibussowitsch     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
32963b00a383SHong Zhang 
32979566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
329828b400f6SJacob Faibussowitsch     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
32993b00a383SHong Zhang 
33009566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
330128b400f6SJacob Faibussowitsch     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
33023b00a383SHong Zhang 
3303b20e2604SHong Zhang     /* Update diagonal and off-diagonal portions of submat */
3304b20e2604SHong Zhang     asub = (Mat_MPIAIJ *)(*submat)->data;
33059566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
33069566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_o, &n));
330748a46eb9SPierre Jolivet     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
33089566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
33099566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
33103b00a383SHong Zhang 
33113b00a383SHong Zhang   } else { /* call == MAT_INITIAL_MATRIX) */
3312835f2295SStefano Zampini     PetscInt *garray;
3313b20e2604SHong Zhang     PetscInt  BsubN;
33143b00a383SHong Zhang 
3315b20e2604SHong Zhang     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
33169566063dSJacob Faibussowitsch     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
33173b00a383SHong Zhang 
3318b20e2604SHong Zhang     /* Create local submatrices Asub and Bsub */
33199566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
33209566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
33213b00a383SHong Zhang 
33229c988bcaSHong Zhang     /* Create submatrix M */
33239566063dSJacob Faibussowitsch     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
33243b00a383SHong Zhang 
3325b20e2604SHong Zhang     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3326b20e2604SHong Zhang     asub = (Mat_MPIAIJ *)M->data;
33277cfce09cSHong Zhang 
33289566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3329b20e2604SHong Zhang     n = asub->B->cmap->N;
3330b20e2604SHong Zhang     if (BsubN > n) {
3331c4762a1bSJed Brown       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
33327cfce09cSHong Zhang       const PetscInt *idx;
33339c988bcaSHong Zhang       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
33349566063dSJacob Faibussowitsch       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
33357cfce09cSHong Zhang 
33369566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &idx_new));
33377cfce09cSHong Zhang       j = 0;
33389566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(iscol_o, &idx));
3339b20e2604SHong Zhang       for (i = 0; i < n; i++) {
33407cfce09cSHong Zhang         if (j >= BsubN) break;
33419c988bcaSHong Zhang         while (subgarray[i] > garray[j]) j++;
33427cfce09cSHong Zhang 
33439c988bcaSHong Zhang         if (subgarray[i] == garray[j]) {
33447cfce09cSHong Zhang           idx_new[i] = idx[j++];
334598921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
33467cfce09cSHong Zhang       }
33479566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(iscol_o, &idx));
33487cfce09cSHong Zhang 
33499566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iscol_o));
33509566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
33517cfce09cSHong Zhang 
3352b20e2604SHong Zhang     } else if (BsubN < n) {
335398921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3354b20e2604SHong Zhang     }
33557cfce09cSHong Zhang 
33569566063dSJacob Faibussowitsch     PetscCall(PetscFree(garray));
3357b20e2604SHong Zhang     *submat = M;
33583b00a383SHong Zhang 
3359e489de8fSHong Zhang     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
33609566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
33619566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrow_d));
33623b00a383SHong Zhang 
33639566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
33649566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_d));
33653b00a383SHong Zhang 
33669566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
33679566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_o));
33683b00a383SHong Zhang   }
33693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
33703b00a383SHong Zhang }
33713b00a383SHong Zhang 
3372d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3373d71ae5a4SJacob Faibussowitsch {
33741358a193SHong Zhang   IS        iscol_local = NULL, isrow_d;
33753782ecc7SHong Zhang   PetscInt  csize;
337618e627e3SHong Zhang   PetscInt  n, i, j, start, end;
33774a3daf6eSHong Zhang   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
33783782ecc7SHong Zhang   MPI_Comm  comm;
33793782ecc7SHong Zhang 
33803782ecc7SHong Zhang   PetscFunctionBegin;
3381bcae8d28SHong Zhang   /* If isrow has same processor distribution as mat,
3382a31a438cSHong Zhang      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
33838f69fa7bSHong Zhang   if (call == MAT_REUSE_MATRIX) {
33849566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3385d5761cdaSHong Zhang     if (isrow_d) {
3386d5761cdaSHong Zhang       sameRowDist  = PETSC_TRUE;
3387d5761cdaSHong Zhang       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3388d5761cdaSHong Zhang     } else {
33899566063dSJacob Faibussowitsch       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3390d5761cdaSHong Zhang       if (iscol_local) {
3391d5761cdaSHong Zhang         sameRowDist  = PETSC_TRUE;
3392d5761cdaSHong Zhang         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3393d5761cdaSHong Zhang       }
3394d5761cdaSHong Zhang     }
33958f69fa7bSHong Zhang   } else {
3396e489de8fSHong Zhang     /* Check if isrow has same processor distribution as mat */
339718e627e3SHong Zhang     sameDist[0] = PETSC_FALSE;
33989566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(isrow, &n));
33993782ecc7SHong Zhang     if (!n) {
340018e627e3SHong Zhang       sameDist[0] = PETSC_TRUE;
34013782ecc7SHong Zhang     } else {
34029566063dSJacob Faibussowitsch       PetscCall(ISGetMinMax(isrow, &i, &j));
34039566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3404ad540459SPierre Jolivet       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
34058f69fa7bSHong Zhang     }
34063782ecc7SHong Zhang 
3407e489de8fSHong Zhang     /* Check if iscol has same processor distribution as mat */
340818e627e3SHong Zhang     sameDist[1] = PETSC_FALSE;
34099566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &n));
341018e627e3SHong Zhang     if (!n) {
341118e627e3SHong Zhang       sameDist[1] = PETSC_TRUE;
341218e627e3SHong Zhang     } else {
34139566063dSJacob Faibussowitsch       PetscCall(ISGetMinMax(iscol, &i, &j));
34149566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
341518e627e3SHong Zhang       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
341618e627e3SHong Zhang     }
341718e627e3SHong Zhang 
34189566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3419462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
342018e627e3SHong Zhang     sameRowDist = tsameDist[0];
342118e627e3SHong Zhang   }
342218e627e3SHong Zhang 
342318e627e3SHong Zhang   if (sameRowDist) {
3424b20e2604SHong Zhang     if (tsameDist[1]) { /* sameRowDist & sameColDist */
34253b00a383SHong Zhang       /* isrow and iscol have same processor distribution as mat */
34269566063dSJacob Faibussowitsch       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
34273ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
3428b20e2604SHong Zhang     } else { /* sameRowDist */
34293b00a383SHong Zhang       /* isrow has same processor distribution as mat */
34301358a193SHong Zhang       if (call == MAT_INITIAL_MATRIX) {
34311358a193SHong Zhang         PetscBool sorted;
34329566063dSJacob Faibussowitsch         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
34339566063dSJacob Faibussowitsch         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
34349566063dSJacob Faibussowitsch         PetscCall(ISGetSize(iscol, &i));
343508401ef6SPierre Jolivet         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
34361358a193SHong Zhang 
34379566063dSJacob Faibussowitsch         PetscCall(ISSorted(iscol_local, &sorted));
34381358a193SHong Zhang         if (sorted) {
34391358a193SHong Zhang           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
34409566063dSJacob Faibussowitsch           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
34413ba16761SJacob Faibussowitsch           PetscFunctionReturn(PETSC_SUCCESS);
34423782ecc7SHong Zhang         }
34431358a193SHong Zhang       } else { /* call == MAT_REUSE_MATRIX */
344448c0d076SHong Zhang         IS iscol_sub;
34459566063dSJacob Faibussowitsch         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
344648c0d076SHong Zhang         if (iscol_sub) {
34479566063dSJacob Faibussowitsch           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
34483ba16761SJacob Faibussowitsch           PetscFunctionReturn(PETSC_SUCCESS);
344948c0d076SHong Zhang         }
34501358a193SHong Zhang       }
34511358a193SHong Zhang     }
34521358a193SHong Zhang   }
34533782ecc7SHong Zhang 
3454bcae8d28SHong Zhang   /* General case: iscol -> iscol_local which has global size of iscol */
34553782ecc7SHong Zhang   if (call == MAT_REUSE_MATRIX) {
34569566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
345728b400f6SJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
34583782ecc7SHong Zhang   } else {
345948a46eb9SPierre Jolivet     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
34601358a193SHong Zhang   }
34613782ecc7SHong Zhang 
34629566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
34639566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
34648f69fa7bSHong Zhang 
3465b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
34669566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
34679566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
3468b79d0421SJed Brown   }
34693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34704aa3045dSJed Brown }
34714aa3045dSJed Brown 
3472feb78a15SHong Zhang /*@C
347311a5261eSBarry Smith   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3474feb78a15SHong Zhang   and "off-diagonal" part of the matrix in CSR format.
3475feb78a15SHong Zhang 
3476d083f849SBarry Smith   Collective
3477feb78a15SHong Zhang 
3478feb78a15SHong Zhang   Input Parameters:
3479feb78a15SHong Zhang + comm   - MPI communicator
3480feb78a15SHong Zhang . A      - "diagonal" portion of matrix
3481b20e2604SHong Zhang . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
34822ef1f0ffSBarry Smith - garray - global index of `B` columns
3483feb78a15SHong Zhang 
3484feb78a15SHong Zhang   Output Parameter:
34852ef1f0ffSBarry Smith . mat - the matrix, with input `A` as its local diagonal matrix
348627430b45SBarry Smith 
3487feb78a15SHong Zhang   Level: advanced
3488feb78a15SHong Zhang 
3489feb78a15SHong Zhang   Notes:
349011a5261eSBarry Smith   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
349111a5261eSBarry Smith 
34922ef1f0ffSBarry Smith   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3493feb78a15SHong Zhang 
34941cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3495feb78a15SHong Zhang @*/
3496d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3497d71ae5a4SJacob Faibussowitsch {
3498feb78a15SHong Zhang   Mat_MPIAIJ        *maij;
3499e489de8fSHong Zhang   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3500a5348796SHong Zhang   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3501ce496241SStefano Zampini   const PetscScalar *oa;
3502e489de8fSHong Zhang   Mat                Bnew;
3503feb78a15SHong Zhang   PetscInt           m, n, N;
35044ab4d6f4SRichard Tran Mills   MatType            mpi_mat_type;
3505feb78a15SHong Zhang 
3506feb78a15SHong Zhang   PetscFunctionBegin;
35079566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
35089566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
350908401ef6SPierre Jolivet   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
351037a5e0faSPierre Jolivet   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3511b6d9b4e0SHong Zhang   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
351208401ef6SPierre Jolivet   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3513feb78a15SHong Zhang 
3514e489de8fSHong Zhang   /* Get global columns of mat */
3515462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3516feb78a15SHong Zhang 
35179566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
35184ab4d6f4SRichard Tran Mills   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
35194ab4d6f4SRichard Tran Mills   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
35204ab4d6f4SRichard Tran Mills   PetscCall(MatSetType(*mat, mpi_mat_type));
35214ab4d6f4SRichard Tran Mills 
352237a5e0faSPierre Jolivet   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3523feb78a15SHong Zhang   maij = (Mat_MPIAIJ *)(*mat)->data;
3524feb78a15SHong Zhang 
3525feb78a15SHong Zhang   (*mat)->preallocated = PETSC_TRUE;
3526feb78a15SHong Zhang 
35279566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->rmap));
35289566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3529feb78a15SHong Zhang 
3530e489de8fSHong Zhang   /* Set A as diagonal portion of *mat */
3531feb78a15SHong Zhang   maij->A = A;
3532feb78a15SHong Zhang 
3533a5348796SHong Zhang   nz = oi[m];
3534a5348796SHong Zhang   for (i = 0; i < nz; i++) {
3535a5348796SHong Zhang     col   = oj[i];
3536a5348796SHong Zhang     oj[i] = garray[col];
3537feb78a15SHong Zhang   }
3538feb78a15SHong Zhang 
3539e489de8fSHong Zhang   /* Set Bnew as off-diagonal portion of *mat */
35409566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
35419566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
35429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3543e489de8fSHong Zhang   bnew        = (Mat_SeqAIJ *)Bnew->data;
3544e489de8fSHong Zhang   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3545e489de8fSHong Zhang   maij->B     = Bnew;
3546d5761cdaSHong Zhang 
354708401ef6SPierre Jolivet   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3548d5761cdaSHong Zhang 
3549d5761cdaSHong Zhang   b->free_a  = PETSC_FALSE;
3550d5761cdaSHong Zhang   b->free_ij = PETSC_FALSE;
35519566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
3552d5761cdaSHong Zhang 
3553e489de8fSHong Zhang   bnew->free_a  = PETSC_TRUE;
3554e489de8fSHong Zhang   bnew->free_ij = PETSC_TRUE;
3555feb78a15SHong Zhang 
3556a5348796SHong Zhang   /* condense columns of maij->B */
35579566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
35589566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
35599566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
35609566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
35619566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3563feb78a15SHong Zhang }
3564feb78a15SHong Zhang 
3565ef514586SHong Zhang extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
35664aa3045dSJed Brown 
3567d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3568d71ae5a4SJacob Faibussowitsch {
356998b658c4SHong Zhang   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
357085f27616SHong Zhang   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
357198b658c4SHong Zhang   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
35721fd43edeSHong Zhang   Mat             M, Msub, B = a->B;
357398b658c4SHong Zhang   MatScalar      *aa;
357400e6dbe6SBarry Smith   Mat_SeqAIJ     *aij;
3575a31a438cSHong Zhang   PetscInt       *garray = a->garray, *colsub, Ncols;
357698b658c4SHong Zhang   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
357798b658c4SHong Zhang   IS              iscol_sub, iscmap;
357898b658c4SHong Zhang   const PetscInt *is_idx, *cmap;
357918e627e3SHong Zhang   PetscBool       allcolumns = PETSC_FALSE;
3580a31a438cSHong Zhang   MPI_Comm        comm;
35817e2c5f70SBarry Smith 
3582a0ff6018SBarry Smith   PetscFunctionBegin;
35839566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3584d5761cdaSHong Zhang   if (call == MAT_REUSE_MATRIX) {
35859566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
358628b400f6SJacob Faibussowitsch     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
35879566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol_sub, &count));
3588d5761cdaSHong Zhang 
35899566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
359028b400f6SJacob Faibussowitsch     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3591d5761cdaSHong Zhang 
35929566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
359328b400f6SJacob Faibussowitsch     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3594d5761cdaSHong Zhang 
35959566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3596d5761cdaSHong Zhang 
3597d5761cdaSHong Zhang   } else { /* call == MAT_INITIAL_MATRIX) */
35983b00a383SHong Zhang     PetscBool flg;
35993b00a383SHong Zhang 
36009566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &n));
36019566063dSJacob Faibussowitsch     PetscCall(ISGetSize(iscol, &Ncols));
3602bcae8d28SHong Zhang 
36033b00a383SHong Zhang     /* (1) iscol -> nonscalable iscol_local */
3604366a327dSHong Zhang     /* Check for special case: each processor gets entire matrix columns */
36059566063dSJacob Faibussowitsch     PetscCall(ISIdentity(iscol_local, &flg));
3606366a327dSHong Zhang     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3607462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3608366a327dSHong Zhang     if (allcolumns) {
3609366a327dSHong Zhang       iscol_sub = iscol_local;
36109566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)iscol_local));
36119566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3612366a327dSHong Zhang 
36133b00a383SHong Zhang     } else {
36141358a193SHong Zhang       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3615244c7f15SHong Zhang       PetscInt *idx, *cmap1, k;
36169566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(Ncols, &idx));
36179566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(Ncols, &cmap1));
36189566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(iscol_local, &is_idx));
36198d2139bdSHong Zhang       count = 0;
3620a31a438cSHong Zhang       k     = 0;
3621a31a438cSHong Zhang       for (i = 0; i < Ncols; i++) {
3622a31a438cSHong Zhang         j = is_idx[i];
3623a31a438cSHong Zhang         if (j >= cstart && j < cend) {
3624a31a438cSHong Zhang           /* diagonal part of mat */
36258d2139bdSHong Zhang           idx[count]     = j;
3626366a327dSHong Zhang           cmap1[count++] = i; /* column index in submat */
36274a3daf6eSHong Zhang         } else if (Bn) {
3628a31a438cSHong Zhang           /* off-diagonal part of mat */
3629a31a438cSHong Zhang           if (j == garray[k]) {
36308d2139bdSHong Zhang             idx[count]     = j;
3631a31a438cSHong Zhang             cmap1[count++] = i; /* column index in submat */
3632a31a438cSHong Zhang           } else if (j > garray[k]) {
3633a31a438cSHong Zhang             while (j > garray[k] && k < Bn - 1) k++;
3634a31a438cSHong Zhang             if (j == garray[k]) {
3635a31a438cSHong Zhang               idx[count]     = j;
3636a31a438cSHong Zhang               cmap1[count++] = i; /* column index in submat */
36378d2139bdSHong Zhang             }
36388d2139bdSHong Zhang           }
36398d2139bdSHong Zhang         }
36408d2139bdSHong Zhang       }
36419566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
36428d2139bdSHong Zhang 
36439566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
36449566063dSJacob Faibussowitsch       PetscCall(ISGetBlockSize(iscol, &cbs));
36459566063dSJacob Faibussowitsch       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3646b6d9b4e0SHong Zhang 
36479566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3648a31a438cSHong Zhang     }
36498b3fa1f7SHong Zhang 
36503b00a383SHong Zhang     /* (3) Create sequential Msub */
36519566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3652d5761cdaSHong Zhang   }
36538d2139bdSHong Zhang 
36549566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_sub, &count));
365557508eceSPierre Jolivet   aij = (Mat_SeqAIJ *)Msub->data;
365698b658c4SHong Zhang   ii  = aij->i;
36579566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscmap, &cmap));
3658a0ff6018SBarry Smith 
3659a0ff6018SBarry Smith   /*
3660a0ff6018SBarry Smith       m - number of local rows
3661a31a438cSHong Zhang       Ncols - number of columns (same on all processors)
3662a0ff6018SBarry Smith       rstart - first row in new global matrix generated
3663a0ff6018SBarry Smith   */
36649566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Msub, &m, NULL));
366598b658c4SHong Zhang 
36663b00a383SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
36673b00a383SHong Zhang     /* (4) Create parallel newmat */
366898b658c4SHong Zhang     PetscMPIInt rank, size;
3669bcae8d28SHong Zhang     PetscInt    csize;
367098b658c4SHong Zhang 
36719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(comm, &size));
36729566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
367300e6dbe6SBarry Smith 
3674a0ff6018SBarry Smith     /*
367500e6dbe6SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
367600e6dbe6SBarry Smith         portions of the matrix in order to do correct preallocation
3677a0ff6018SBarry Smith     */
367800e6dbe6SBarry Smith 
367900e6dbe6SBarry Smith     /* first get start and end of "diagonal" columns */
36809566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(iscol, &csize));
36816a6a5d1dSBarry Smith     if (csize == PETSC_DECIDE) {
36829566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow, &mglobal));
3683a31a438cSHong Zhang       if (mglobal == Ncols) { /* square matrix */
3684e2c4fddaSBarry Smith         nlocal = m;
36856a6a5d1dSBarry Smith       } else {
3686a31a438cSHong Zhang         nlocal = Ncols / size + ((Ncols % size) > rank);
3687ab50ec6bSBarry Smith       }
3688ab50ec6bSBarry Smith     } else {
36896a6a5d1dSBarry Smith       nlocal = csize;
36906a6a5d1dSBarry Smith     }
36919566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
369200e6dbe6SBarry Smith     rstart = rend - nlocal;
3693aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
369400e6dbe6SBarry Smith 
369500e6dbe6SBarry Smith     /* next, compute all the lengths */
369698b658c4SHong Zhang     jj = aij->j;
36979566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
369800e6dbe6SBarry Smith     olens = dlens + m;
369900e6dbe6SBarry Smith     for (i = 0; i < m; i++) {
370000e6dbe6SBarry Smith       jend = ii[i + 1] - ii[i];
370100e6dbe6SBarry Smith       olen = 0;
370200e6dbe6SBarry Smith       dlen = 0;
370300e6dbe6SBarry Smith       for (j = 0; j < jend; j++) {
370415b2185cSHong Zhang         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
370500e6dbe6SBarry Smith         else dlen++;
370600e6dbe6SBarry Smith         jj++;
370700e6dbe6SBarry Smith       }
370800e6dbe6SBarry Smith       olens[i] = olen;
370900e6dbe6SBarry Smith       dlens[i] = dlen;
371000e6dbe6SBarry Smith     }
3711b6d9b4e0SHong Zhang 
37129566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(isrow, &bs));
37139566063dSJacob Faibussowitsch     PetscCall(ISGetBlockSize(iscol, &cbs));
371498b658c4SHong Zhang 
37159566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &M));
37169566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
37179566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(M, bs, cbs));
37189566063dSJacob Faibussowitsch     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
37199566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
37209566063dSJacob Faibussowitsch     PetscCall(PetscFree(dlens));
3721d5761cdaSHong Zhang 
3722d5761cdaSHong Zhang   } else { /* call == MAT_REUSE_MATRIX */
3723a0ff6018SBarry Smith     M = *newmat;
37249566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M, &i, NULL));
372508401ef6SPierre Jolivet     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
37269566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
3727c48de900SBarry Smith     /*
3728c48de900SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3729c48de900SBarry Smith        rather than the slower MatSetValues().
3730c48de900SBarry Smith     */
3731c48de900SBarry Smith     M->was_assembled = PETSC_TRUE;
3732c48de900SBarry Smith     M->assembled     = PETSC_FALSE;
3733a0ff6018SBarry Smith   }
3734548ecf4dSHong Zhang 
37353b00a383SHong Zhang   /* (5) Set values of Msub to *newmat */
37369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(count, &colsub));
37379566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
373898b658c4SHong Zhang 
373998b658c4SHong Zhang   jj = aij->j;
37409566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3741a0ff6018SBarry Smith   for (i = 0; i < m; i++) {
3742a0ff6018SBarry Smith     row = rstart + i;
374300e6dbe6SBarry Smith     nz  = ii[i + 1] - ii[i];
374415b2185cSHong Zhang     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
37459566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
37469371c9d4SSatish Balay     jj += nz;
37479371c9d4SSatish Balay     aa += nz;
3748a0ff6018SBarry Smith   }
37499566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
37509566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscmap, &cmap));
3751a0ff6018SBarry Smith 
37529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
37539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3754fee21e36SBarry Smith 
37559566063dSJacob Faibussowitsch   PetscCall(PetscFree(colsub));
375698b658c4SHong Zhang 
375798b658c4SHong Zhang   /* save Msub, iscol_sub and iscmap used in processor for next request */
3758fee21e36SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
37593b00a383SHong Zhang     *newmat = M;
3760f4f49eeaSPierre Jolivet     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
37619566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Msub));
376298b658c4SHong Zhang 
3763f4f49eeaSPierre Jolivet     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
37649566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_sub));
376598b658c4SHong Zhang 
3766f4f49eeaSPierre Jolivet     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
37679566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscmap));
3768bcae8d28SHong Zhang 
3769bcae8d28SHong Zhang     if (iscol_local) {
3770f4f49eeaSPierre Jolivet       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
37719566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&iscol_local));
3772bcae8d28SHong Zhang     }
377398b658c4SHong Zhang   }
37743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3775a0ff6018SBarry Smith }
3776273d9f13SBarry Smith 
3777df40acb1SHong Zhang /*
3778df40acb1SHong Zhang     Not great since it makes two copies of the submatrix, first an SeqAIJ
3779df40acb1SHong Zhang   in local and then by concatenating the local matrices the end result.
3780df40acb1SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3781df40acb1SHong Zhang 
378211a5261eSBarry Smith   This requires a sequential iscol with all indices.
3783df40acb1SHong Zhang */
3784d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3785d71ae5a4SJacob Faibussowitsch {
3786df40acb1SHong Zhang   PetscMPIInt rank, size;
3787df40acb1SHong Zhang   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3788df40acb1SHong Zhang   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3789df40acb1SHong Zhang   Mat         M, Mreuse;
379098b658c4SHong Zhang   MatScalar  *aa, *vwork;
3791df40acb1SHong Zhang   MPI_Comm    comm;
3792df40acb1SHong Zhang   Mat_SeqAIJ *aij;
37930b27a90eSHong Zhang   PetscBool   colflag, allcolumns = PETSC_FALSE;
3794df40acb1SHong Zhang 
3795df40acb1SHong Zhang   PetscFunctionBegin;
37969566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
37979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
37989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3799df40acb1SHong Zhang 
38000b27a90eSHong Zhang   /* Check for special case: each processor gets entire matrix columns */
38019566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &colflag));
38029566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &n));
38030b27a90eSHong Zhang   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3804462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
38050b27a90eSHong Zhang 
3806df40acb1SHong Zhang   if (call == MAT_REUSE_MATRIX) {
38079566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
380828b400f6SJacob Faibussowitsch     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
38099566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3810df40acb1SHong Zhang   } else {
38119566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3812df40acb1SHong Zhang   }
3813df40acb1SHong Zhang 
3814df40acb1SHong Zhang   /*
3815df40acb1SHong Zhang       m - number of local rows
3816df40acb1SHong Zhang       n - number of columns (same on all processors)
3817df40acb1SHong Zhang       rstart - first row in new global matrix generated
3818df40acb1SHong Zhang   */
38199566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Mreuse, &m, &n));
38209566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3821df40acb1SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
382257508eceSPierre Jolivet     aij = (Mat_SeqAIJ *)Mreuse->data;
3823df40acb1SHong Zhang     ii  = aij->i;
3824df40acb1SHong Zhang     jj  = aij->j;
3825df40acb1SHong Zhang 
3826df40acb1SHong Zhang     /*
3827df40acb1SHong Zhang         Determine the number of non-zeros in the diagonal and off-diagonal
3828df40acb1SHong Zhang         portions of the matrix in order to do correct preallocation
3829df40acb1SHong Zhang     */
3830df40acb1SHong Zhang 
3831df40acb1SHong Zhang     /* first get start and end of "diagonal" columns */
3832df40acb1SHong Zhang     if (csize == PETSC_DECIDE) {
38339566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow, &mglobal));
3834df40acb1SHong Zhang       if (mglobal == n) { /* square matrix */
3835df40acb1SHong Zhang         nlocal = m;
3836df40acb1SHong Zhang       } else {
3837df40acb1SHong Zhang         nlocal = n / size + ((n % size) > rank);
3838df40acb1SHong Zhang       }
3839df40acb1SHong Zhang     } else {
3840df40acb1SHong Zhang       nlocal = csize;
3841df40acb1SHong Zhang     }
38429566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3843df40acb1SHong Zhang     rstart = rend - nlocal;
3844aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3845df40acb1SHong Zhang 
3846df40acb1SHong Zhang     /* next, compute all the lengths */
38479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3848df40acb1SHong Zhang     olens = dlens + m;
3849df40acb1SHong Zhang     for (i = 0; i < m; i++) {
3850df40acb1SHong Zhang       jend = ii[i + 1] - ii[i];
3851df40acb1SHong Zhang       olen = 0;
3852df40acb1SHong Zhang       dlen = 0;
3853df40acb1SHong Zhang       for (j = 0; j < jend; j++) {
3854df40acb1SHong Zhang         if (*jj < rstart || *jj >= rend) olen++;
3855df40acb1SHong Zhang         else dlen++;
3856df40acb1SHong Zhang         jj++;
3857df40acb1SHong Zhang       }
3858df40acb1SHong Zhang       olens[i] = olen;
3859df40acb1SHong Zhang       dlens[i] = dlen;
3860df40acb1SHong Zhang     }
38619566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &M));
38629566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
38639566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(M, bs, cbs));
38649566063dSJacob Faibussowitsch     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
38659566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
38669566063dSJacob Faibussowitsch     PetscCall(PetscFree(dlens));
3867df40acb1SHong Zhang   } else {
3868df40acb1SHong Zhang     PetscInt ml, nl;
3869df40acb1SHong Zhang 
3870df40acb1SHong Zhang     M = *newmat;
38719566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M, &ml, &nl));
387208401ef6SPierre Jolivet     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
38739566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
3874df40acb1SHong Zhang     /*
3875df40acb1SHong Zhang          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3876df40acb1SHong Zhang        rather than the slower MatSetValues().
3877df40acb1SHong Zhang     */
3878df40acb1SHong Zhang     M->was_assembled = PETSC_TRUE;
3879df40acb1SHong Zhang     M->assembled     = PETSC_FALSE;
3880df40acb1SHong Zhang   }
38819566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
388257508eceSPierre Jolivet   aij = (Mat_SeqAIJ *)Mreuse->data;
3883df40acb1SHong Zhang   ii  = aij->i;
3884df40acb1SHong Zhang   jj  = aij->j;
38852e5835c6SStefano Zampini 
38862e5835c6SStefano Zampini   /* trigger copy to CPU if needed */
38879566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3888df40acb1SHong Zhang   for (i = 0; i < m; i++) {
3889df40acb1SHong Zhang     row   = rstart + i;
3890df40acb1SHong Zhang     nz    = ii[i + 1] - ii[i];
38919371c9d4SSatish Balay     cwork = jj;
38928e3a54c0SPierre Jolivet     jj    = PetscSafePointerPlusOffset(jj, nz);
38939371c9d4SSatish Balay     vwork = aa;
38948e3a54c0SPierre Jolivet     aa    = PetscSafePointerPlusOffset(aa, nz);
38959566063dSJacob Faibussowitsch     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3896df40acb1SHong Zhang   }
38979566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3898df40acb1SHong Zhang 
38999566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
39009566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3901df40acb1SHong Zhang   *newmat = M;
3902df40acb1SHong Zhang 
3903df40acb1SHong Zhang   /* save submatrix used in processor for next request */
3904df40acb1SHong Zhang   if (call == MAT_INITIAL_MATRIX) {
39059566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
39069566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&Mreuse));
3907df40acb1SHong Zhang   }
39083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3909df40acb1SHong Zhang }
3910df40acb1SHong Zhang 
3911ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3912d71ae5a4SJacob Faibussowitsch {
39136a3d2595SBarry Smith   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
39149f0612e4SBarry Smith   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3915ccd8e176SBarry Smith   const PetscInt *JJ;
3916eeb24464SBarry Smith   PetscBool       nooffprocentries;
39176a3d2595SBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3918ccd8e176SBarry Smith 
3919ccd8e176SBarry Smith   PetscFunctionBegin;
39209566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
39219566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
3922d0f46423SBarry Smith   m       = B->rmap->n;
3923d0f46423SBarry Smith   cstart  = B->cmap->rstart;
3924d0f46423SBarry Smith   cend    = B->cmap->rend;
3925d0f46423SBarry Smith   rstart  = B->rmap->rstart;
39269f0612e4SBarry Smith   irstart = Ii[0];
3927899cda47SBarry Smith 
39289566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3929ccd8e176SBarry Smith 
393076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
39318f8f2f0dSBarry Smith     for (i = 0; i < m; i++) {
3932ecc77c7aSBarry Smith       nnz = Ii[i + 1] - Ii[i];
39339f0612e4SBarry Smith       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
393408401ef6SPierre Jolivet       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
393508401ef6SPierre Jolivet       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
393608401ef6SPierre Jolivet       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3937ecc77c7aSBarry Smith     }
393876bd3646SJed Brown   }
3939ecc77c7aSBarry Smith 
39408f8f2f0dSBarry Smith   for (i = 0; i < m; i++) {
3941b7940d39SSatish Balay     nnz     = Ii[i + 1] - Ii[i];
39429f0612e4SBarry Smith     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3943ccd8e176SBarry Smith     nnz_max = PetscMax(nnz_max, nnz);
3944ccd8e176SBarry Smith     d       = 0;
39450daa03b5SJed Brown     for (j = 0; j < nnz; j++) {
39460daa03b5SJed Brown       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3947ccd8e176SBarry Smith     }
3948ccd8e176SBarry Smith     d_nnz[i] = d;
3949ccd8e176SBarry Smith     o_nnz[i] = nnz - d;
3950ccd8e176SBarry Smith   }
39519566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
39529566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
3953ccd8e176SBarry Smith 
39548f8f2f0dSBarry Smith   for (i = 0; i < m; i++) {
3955ccd8e176SBarry Smith     ii = i + rstart;
39569f0612e4SBarry Smith     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3957ccd8e176SBarry Smith   }
3958eeb24464SBarry Smith   nooffprocentries    = B->nooffprocentries;
3959eeb24464SBarry Smith   B->nooffprocentries = PETSC_TRUE;
39609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
39619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3962eeb24464SBarry Smith   B->nooffprocentries = nooffprocentries;
3963ccd8e176SBarry Smith 
39646a3d2595SBarry Smith   /* count number of entries below block diagonal */
39656a3d2595SBarry Smith   PetscCall(PetscFree(Aij->ld));
39666a3d2595SBarry Smith   PetscCall(PetscCalloc1(m, &ld));
39676a3d2595SBarry Smith   Aij->ld = ld;
39686a3d2595SBarry Smith   for (i = 0; i < m; i++) {
39696a3d2595SBarry Smith     nnz = Ii[i + 1] - Ii[i];
39706a3d2595SBarry Smith     j   = 0;
3971ad540459SPierre Jolivet     while (j < nnz && J[j] < cstart) j++;
39726a3d2595SBarry Smith     ld[i] = j;
3973720a2405SPierre Jolivet     if (J) J += nnz;
39746a3d2595SBarry Smith   }
39756a3d2595SBarry Smith 
39769566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
39773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3978ccd8e176SBarry Smith }
3979ccd8e176SBarry Smith 
39801eea217eSSatish Balay /*@
398111a5261eSBarry Smith   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3982ccd8e176SBarry Smith   (the default parallel PETSc format).
3983ccd8e176SBarry Smith 
3984d083f849SBarry Smith   Collective
3985ccd8e176SBarry Smith 
3986ccd8e176SBarry Smith   Input Parameters:
3987a1661176SMatthew Knepley + B - the matrix
3988d8a51d2aSBarry Smith . i - the indices into `j` for the start of each local row (indices start with zero)
3989d8a51d2aSBarry Smith . j - the column indices for each local row (indices start with zero)
3990ccd8e176SBarry Smith - v - optional values in the matrix
3991ccd8e176SBarry Smith 
3992ccd8e176SBarry Smith   Level: developer
3993ccd8e176SBarry Smith 
399412251496SSatish Balay   Notes:
39952ef1f0ffSBarry Smith   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
39962ef1f0ffSBarry Smith   thus you CANNOT change the matrix entries by changing the values of `v` after you have
399711a5261eSBarry Smith   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
399812251496SSatish Balay 
39992ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
400012251496SSatish Balay 
4001a4bd8bc0SBarry Smith   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4002a4bd8bc0SBarry Smith 
4003a4bd8bc0SBarry Smith   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4004a4bd8bc0SBarry Smith 
4005a4bd8bc0SBarry Smith   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4006a4bd8bc0SBarry Smith   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4007a4bd8bc0SBarry Smith 
400812251496SSatish Balay   The format which is used for the sparse matrix input, is equivalent to a
400912251496SSatish Balay   row-major ordering.. i.e for the following matrix, the input data expected is
4010c5e4d11fSDmitry Karpeev   as shown
401127430b45SBarry Smith .vb
401227430b45SBarry Smith         1 0 0
401327430b45SBarry Smith         2 0 3     P0
401427430b45SBarry Smith        -------
401527430b45SBarry Smith         4 5 6     P1
401627430b45SBarry Smith 
401727430b45SBarry Smith      Process0 [P0] rows_owned=[0,1]
401827430b45SBarry Smith         i =  {0,1,3}  [size = nrow+1  = 2+1]
401927430b45SBarry Smith         j =  {0,0,2}  [size = 3]
402027430b45SBarry Smith         v =  {1,2,3}  [size = 3]
402127430b45SBarry Smith 
402227430b45SBarry Smith      Process1 [P1] rows_owned=[2]
402327430b45SBarry Smith         i =  {0,3}    [size = nrow+1  = 1+1]
402427430b45SBarry Smith         j =  {0,1,2}  [size = 3]
402527430b45SBarry Smith         v =  {4,5,6}  [size = 3]
402627430b45SBarry Smith .ve
402712251496SSatish Balay 
4028fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4029a4bd8bc0SBarry Smith           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4030ccd8e176SBarry Smith @*/
4031d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4032d71ae5a4SJacob Faibussowitsch {
4033ccd8e176SBarry Smith   PetscFunctionBegin;
4034cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
40353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4036ccd8e176SBarry Smith }
4037ccd8e176SBarry Smith 
40385d83a8b1SBarry Smith /*@
403911a5261eSBarry Smith   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4040273d9f13SBarry Smith   (the default parallel PETSc format).  For good matrix assembly performance
4041273d9f13SBarry Smith   the user should preallocate the matrix storage by setting the parameters
404220f4b53cSBarry Smith   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4043273d9f13SBarry Smith 
4044d083f849SBarry Smith   Collective
4045273d9f13SBarry Smith 
4046273d9f13SBarry Smith   Input Parameters:
40471c4f3114SJed Brown + B     - the matrix
4048273d9f13SBarry Smith . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4049273d9f13SBarry Smith            (same value is used for all local rows)
4050273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the
4051273d9f13SBarry Smith            DIAGONAL portion of the local submatrix (possibly different for each row)
40522ef1f0ffSBarry Smith            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4053273d9f13SBarry Smith            The size of this array is equal to the number of local rows, i.e 'm'.
40543287b5eaSJed Brown            For matrices that will be factored, you must leave room for (and set)
40553287b5eaSJed Brown            the diagonal entry even if it is zero.
4056273d9f13SBarry Smith . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4057273d9f13SBarry Smith            submatrix (same value is used for all local rows).
4058273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the
4059273d9f13SBarry Smith            OFF-DIAGONAL portion of the local submatrix (possibly different for
40602ef1f0ffSBarry Smith            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4061273d9f13SBarry Smith            structure. The size of this array is equal to the number
4062273d9f13SBarry Smith            of local rows, i.e 'm'.
4063273d9f13SBarry Smith 
40642920cce0SJacob Faibussowitsch   Example Usage:
406527430b45SBarry Smith   Consider the following 8x8 matrix with 34 non-zero values, that is
406627430b45SBarry Smith   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
406727430b45SBarry Smith   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
406827430b45SBarry Smith   as follows
406927430b45SBarry Smith 
407027430b45SBarry Smith .vb
407127430b45SBarry Smith             1  2  0  |  0  3  0  |  0  4
407227430b45SBarry Smith     Proc0   0  5  6  |  7  0  0  |  8  0
407327430b45SBarry Smith             9  0 10  | 11  0  0  | 12  0
407427430b45SBarry Smith     -------------------------------------
407527430b45SBarry Smith            13  0 14  | 15 16 17  |  0  0
407627430b45SBarry Smith     Proc1   0 18  0  | 19 20 21  |  0  0
407727430b45SBarry Smith             0  0  0  | 22 23  0  | 24  0
407827430b45SBarry Smith     -------------------------------------
407927430b45SBarry Smith     Proc2  25 26 27  |  0  0 28  | 29  0
408027430b45SBarry Smith            30  0  0  | 31 32 33  |  0 34
408127430b45SBarry Smith .ve
408227430b45SBarry Smith 
408327430b45SBarry Smith   This can be represented as a collection of submatrices as
408427430b45SBarry Smith .vb
408527430b45SBarry Smith       A B C
408627430b45SBarry Smith       D E F
408727430b45SBarry Smith       G H I
408827430b45SBarry Smith .ve
408927430b45SBarry Smith 
409027430b45SBarry Smith   Where the submatrices A,B,C are owned by proc0, D,E,F are
409127430b45SBarry Smith   owned by proc1, G,H,I are owned by proc2.
409227430b45SBarry Smith 
409327430b45SBarry Smith   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
409427430b45SBarry Smith   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
409527430b45SBarry Smith   The 'M','N' parameters are 8,8, and have the same values on all procs.
409627430b45SBarry Smith 
409727430b45SBarry Smith   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
409827430b45SBarry Smith   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
409927430b45SBarry Smith   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
410027430b45SBarry Smith   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
410127430b45SBarry Smith   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
410227430b45SBarry Smith   matrix, ans [DF] as another `MATSEQAIJ` matrix.
410327430b45SBarry Smith 
410420f4b53cSBarry Smith   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
410520f4b53cSBarry Smith   allocated for every row of the local diagonal submatrix, and `o_nz`
410627430b45SBarry Smith   storage locations are allocated for every row of the OFF-DIAGONAL submat.
410720f4b53cSBarry Smith   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
410827430b45SBarry Smith   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
410920f4b53cSBarry Smith   In this case, the values of `d_nz`, `o_nz` are
411027430b45SBarry Smith .vb
411127430b45SBarry Smith      proc0  dnz = 2, o_nz = 2
411227430b45SBarry Smith      proc1  dnz = 3, o_nz = 2
411327430b45SBarry Smith      proc2  dnz = 1, o_nz = 4
411427430b45SBarry Smith .ve
411520f4b53cSBarry Smith   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
411627430b45SBarry Smith   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
411727430b45SBarry Smith   for proc3. i.e we are using 12+15+10=37 storage locations to store
411827430b45SBarry Smith   34 values.
411927430b45SBarry Smith 
412020f4b53cSBarry Smith   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
412127430b45SBarry Smith   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
412220f4b53cSBarry Smith   In the above case the values for `d_nnz`, `o_nnz` are
412327430b45SBarry Smith .vb
412427430b45SBarry Smith      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
412527430b45SBarry Smith      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
412627430b45SBarry Smith      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
412727430b45SBarry Smith .ve
412827430b45SBarry Smith   Here the space allocated is sum of all the above values i.e 34, and
412927430b45SBarry Smith   hence pre-allocation is perfect.
413027430b45SBarry Smith 
413127430b45SBarry Smith   Level: intermediate
413227430b45SBarry Smith 
413327430b45SBarry Smith   Notes:
413449a6f317SBarry Smith   If the *_nnz parameter is given then the *_nz parameter is ignored
413549a6f317SBarry Smith 
413627430b45SBarry Smith   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
41370598bfebSBarry Smith   storage.  The stored row and column indices begin with zero.
4138651615e1SBarry Smith   See [Sparse Matrices](sec_matsparse) for details.
4139273d9f13SBarry Smith 
4140273d9f13SBarry Smith   The parallel matrix is partitioned such that the first m0 rows belong to
4141273d9f13SBarry Smith   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4142273d9f13SBarry Smith   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4143273d9f13SBarry Smith 
4144273d9f13SBarry Smith   The DIAGONAL portion of the local submatrix of a processor can be defined
4145a05b864aSJed Brown   as the submatrix which is obtained by extraction the part corresponding to
4146a05b864aSJed Brown   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4147a05b864aSJed Brown   first row that belongs to the processor, r2 is the last row belonging to
4148a05b864aSJed Brown   the this processor, and c1-c2 is range of indices of the local part of a
4149a05b864aSJed Brown   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4150a05b864aSJed Brown   common case of a square matrix, the row and column ranges are the same and
4151a05b864aSJed Brown   the DIAGONAL part is also square. The remaining portion of the local
4152a05b864aSJed Brown   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4153273d9f13SBarry Smith 
41542ef1f0ffSBarry Smith   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4155273d9f13SBarry Smith 
415627430b45SBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4157aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
415827430b45SBarry Smith   You can also run with the option `-info` and look for messages with the string
4159aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
4160aa95bbe8SBarry Smith 
41611cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4162a4bd8bc0SBarry Smith           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4163273d9f13SBarry Smith @*/
4164d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4165d71ae5a4SJacob Faibussowitsch {
4166273d9f13SBarry Smith   PetscFunctionBegin;
41676ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
41686ba663aaSJed Brown   PetscValidType(B, 1);
4169cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
41703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4171273d9f13SBarry Smith }
4172273d9f13SBarry Smith 
417358d36128SBarry Smith /*@
417411a5261eSBarry Smith   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
41758f8f2f0dSBarry Smith   CSR format for the local rows.
41762fb0ec9aSBarry Smith 
4177d083f849SBarry Smith   Collective
41782fb0ec9aSBarry Smith 
41792fb0ec9aSBarry Smith   Input Parameters:
41802fb0ec9aSBarry Smith + comm - MPI communicator
418111a5261eSBarry Smith . m    - number of local rows (Cannot be `PETSC_DECIDE`)
41822fb0ec9aSBarry Smith . n    - This value should be the same as the local size used in creating the
4183d8a51d2aSBarry Smith          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4184d8a51d2aSBarry Smith          calculated if `N` is given) For square matrices n is almost always `m`.
4185d8a51d2aSBarry Smith . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4186d8a51d2aSBarry Smith . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
41875693b835SJunchao Zhang . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
41885693b835SJunchao Zhang . j    - global column indices
4189f1f2ae84SBarry Smith - a    - optional matrix values
41902fb0ec9aSBarry Smith 
41912fb0ec9aSBarry Smith   Output Parameter:
41922fb0ec9aSBarry Smith . mat - the matrix
419303bfb495SBarry Smith 
41942fb0ec9aSBarry Smith   Level: intermediate
41952fb0ec9aSBarry Smith 
41962fb0ec9aSBarry Smith   Notes:
41972ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4198d4c8cc39SPierre Jolivet   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4199d4c8cc39SPierre Jolivet   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
42002fb0ec9aSBarry Smith 
42012ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
420212251496SSatish Balay 
42035693b835SJunchao Zhang   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4204a4bd8bc0SBarry Smith 
42055693b835SJunchao Zhang   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4206a4bd8bc0SBarry Smith   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4207a4bd8bc0SBarry Smith 
420812251496SSatish Balay   The format which is used for the sparse matrix input, is equivalent to a
4209d4c8cc39SPierre Jolivet   row-major ordering, i.e., for the following matrix, the input data expected is
4210c5e4d11fSDmitry Karpeev   as shown
42112ef1f0ffSBarry Smith .vb
42122ef1f0ffSBarry Smith         1 0 0
42132ef1f0ffSBarry Smith         2 0 3     P0
42142ef1f0ffSBarry Smith        -------
42152ef1f0ffSBarry Smith         4 5 6     P1
42168f8f2f0dSBarry Smith 
42172ef1f0ffSBarry Smith      Process0 [P0] rows_owned=[0,1]
42182ef1f0ffSBarry Smith         i =  {0,1,3}  [size = nrow+1  = 2+1]
42192ef1f0ffSBarry Smith         j =  {0,0,2}  [size = 3]
42202ef1f0ffSBarry Smith         v =  {1,2,3}  [size = 3]
42212fb0ec9aSBarry Smith 
42222ef1f0ffSBarry Smith      Process1 [P1] rows_owned=[2]
42232ef1f0ffSBarry Smith         i =  {0,3}    [size = nrow+1  = 1+1]
42242ef1f0ffSBarry Smith         j =  {0,1,2}  [size = 3]
42252ef1f0ffSBarry Smith         v =  {4,5,6}  [size = 3]
42262ef1f0ffSBarry Smith .ve
42272ef1f0ffSBarry Smith 
4228d4c8cc39SPierre Jolivet .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
42295693b835SJunchao Zhang           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
42302fb0ec9aSBarry Smith @*/
4231d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4232d71ae5a4SJacob Faibussowitsch {
42332fb0ec9aSBarry Smith   PetscFunctionBegin;
423408401ef6SPierre Jolivet   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
423508401ef6SPierre Jolivet   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
42369566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
42379566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
42389566063dSJacob Faibussowitsch   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
42399566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPIAIJ));
42409566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
42413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
42422fb0ec9aSBarry Smith }
42432fb0ec9aSBarry Smith 
42448f8f2f0dSBarry Smith /*@
424511a5261eSBarry Smith   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
42462ef1f0ffSBarry Smith   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
42472ef1f0ffSBarry Smith   from `MatCreateMPIAIJWithArrays()`
42486a3d2595SBarry Smith 
42496a3d2595SBarry Smith   Deprecated: Use `MatUpdateMPIAIJWithArray()`
42508f8f2f0dSBarry Smith 
42518f8f2f0dSBarry Smith   Collective
42528f8f2f0dSBarry Smith 
42538f8f2f0dSBarry Smith   Input Parameters:
42548f8f2f0dSBarry Smith + mat - the matrix
425511a5261eSBarry Smith . m   - number of local rows (Cannot be `PETSC_DECIDE`)
42568f8f2f0dSBarry Smith . n   - This value should be the same as the local size used in creating the
425711a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
42588f8f2f0dSBarry Smith        calculated if N is given) For square matrices n is almost always m.
425911a5261eSBarry Smith . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
426011a5261eSBarry Smith . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
42618f8f2f0dSBarry Smith . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
42628f8f2f0dSBarry Smith . J   - column indices
42638f8f2f0dSBarry Smith - v   - matrix values
42648f8f2f0dSBarry Smith 
42652ef1f0ffSBarry Smith   Level: deprecated
42668f8f2f0dSBarry Smith 
42671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4268a4bd8bc0SBarry Smith           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
42698f8f2f0dSBarry Smith @*/
4270d71ae5a4SJacob Faibussowitsch PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4271d71ae5a4SJacob Faibussowitsch {
42726a3d2595SBarry Smith   PetscInt        nnz, i;
42738f8f2f0dSBarry Smith   PetscBool       nooffprocentries;
42748f8f2f0dSBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4275fff043a9SJunchao Zhang   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4276fff043a9SJunchao Zhang   PetscScalar    *ad, *ao;
42778f8f2f0dSBarry Smith   PetscInt        ldi, Iii, md;
42786a3d2595SBarry Smith   const PetscInt *Adi = Ad->i;
42796a3d2595SBarry Smith   PetscInt       *ld  = Aij->ld;
42808f8f2f0dSBarry Smith 
42818f8f2f0dSBarry Smith   PetscFunctionBegin;
4282aed4548fSBarry Smith   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
428308401ef6SPierre Jolivet   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
428408401ef6SPierre Jolivet   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
428508401ef6SPierre Jolivet   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
42868f8f2f0dSBarry Smith 
42879566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
42889566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
42898f8f2f0dSBarry Smith 
42908f8f2f0dSBarry Smith   for (i = 0; i < m; i++) {
42914c17f1ccSBarry Smith     if (PetscDefined(USE_DEBUG)) {
42924c17f1ccSBarry Smith       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
42934c17f1ccSBarry Smith         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
42944c17f1ccSBarry Smith         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
42954c17f1ccSBarry Smith       }
42964c17f1ccSBarry Smith     }
42978f8f2f0dSBarry Smith     nnz = Ii[i + 1] - Ii[i];
42988f8f2f0dSBarry Smith     Iii = Ii[i];
42998f8f2f0dSBarry Smith     ldi = ld[i];
43008f8f2f0dSBarry Smith     md  = Adi[i + 1] - Adi[i];
43019566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
43029566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
43039566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
43048f8f2f0dSBarry Smith     ad += md;
43058f8f2f0dSBarry Smith     ao += nnz - md;
43068f8f2f0dSBarry Smith   }
43078f8f2f0dSBarry Smith   nooffprocentries      = mat->nooffprocentries;
43088f8f2f0dSBarry Smith   mat->nooffprocentries = PETSC_TRUE;
43099566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
43109566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
43119566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
43129566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
43139566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
43149566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
43159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
43168f8f2f0dSBarry Smith   mat->nooffprocentries = nooffprocentries;
43173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
43188f8f2f0dSBarry Smith }
43198f8f2f0dSBarry Smith 
43206a3d2595SBarry Smith /*@
432111a5261eSBarry Smith   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
43226a3d2595SBarry Smith 
43236a3d2595SBarry Smith   Collective
43246a3d2595SBarry Smith 
43256a3d2595SBarry Smith   Input Parameters:
43266a3d2595SBarry Smith + mat - the matrix
43276a3d2595SBarry Smith - v   - matrix values, stored by row
43286a3d2595SBarry Smith 
43296a3d2595SBarry Smith   Level: intermediate
43306a3d2595SBarry Smith 
4331a4bd8bc0SBarry Smith   Notes:
43326a3d2595SBarry Smith   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
43336a3d2595SBarry Smith 
4334a4bd8bc0SBarry Smith   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4335a4bd8bc0SBarry Smith 
43361cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4337a4bd8bc0SBarry Smith           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
43386a3d2595SBarry Smith @*/
4339d71ae5a4SJacob Faibussowitsch PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4340d71ae5a4SJacob Faibussowitsch {
43416a3d2595SBarry Smith   PetscInt        nnz, i, m;
43426a3d2595SBarry Smith   PetscBool       nooffprocentries;
43436a3d2595SBarry Smith   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
43446a3d2595SBarry Smith   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
43456a3d2595SBarry Smith   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
43466a3d2595SBarry Smith   PetscScalar    *ad, *ao;
43476a3d2595SBarry Smith   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
43486a3d2595SBarry Smith   PetscInt        ldi, Iii, md;
43496a3d2595SBarry Smith   PetscInt       *ld = Aij->ld;
43506a3d2595SBarry Smith 
43516a3d2595SBarry Smith   PetscFunctionBegin;
43526a3d2595SBarry Smith   m = mat->rmap->n;
43536a3d2595SBarry Smith 
43546a3d2595SBarry Smith   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
43556a3d2595SBarry Smith   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
43566a3d2595SBarry Smith   Iii = 0;
43576a3d2595SBarry Smith   for (i = 0; i < m; i++) {
43586a3d2595SBarry Smith     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
43596a3d2595SBarry Smith     ldi = ld[i];
43606a3d2595SBarry Smith     md  = Adi[i + 1] - Adi[i];
43616a3d2595SBarry Smith     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
43626a3d2595SBarry Smith     ad += md;
4363810441c8SPierre Jolivet     if (ao) {
4364810441c8SPierre Jolivet       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4365810441c8SPierre Jolivet       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
43666a3d2595SBarry Smith       ao += nnz - md;
4367810441c8SPierre Jolivet     }
43686a3d2595SBarry Smith     Iii += nnz;
43696a3d2595SBarry Smith   }
43706a3d2595SBarry Smith   nooffprocentries      = mat->nooffprocentries;
43716a3d2595SBarry Smith   mat->nooffprocentries = PETSC_TRUE;
43726a3d2595SBarry Smith   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
43736a3d2595SBarry Smith   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
43746a3d2595SBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
43756a3d2595SBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
43766a3d2595SBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
43776a3d2595SBarry Smith   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
43786a3d2595SBarry Smith   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
43796a3d2595SBarry Smith   mat->nooffprocentries = nooffprocentries;
43803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
43816a3d2595SBarry Smith }
43826a3d2595SBarry Smith 
43835d83a8b1SBarry Smith /*@
438411a5261eSBarry Smith   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4385273d9f13SBarry Smith   (the default parallel PETSc format).  For good matrix assembly performance
4386273d9f13SBarry Smith   the user should preallocate the matrix storage by setting the parameters
43872ef1f0ffSBarry Smith   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4388273d9f13SBarry Smith 
4389d083f849SBarry Smith   Collective
4390273d9f13SBarry Smith 
4391273d9f13SBarry Smith   Input Parameters:
4392273d9f13SBarry Smith + comm  - MPI communicator
439311a5261eSBarry Smith . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4394273d9f13SBarry Smith           This value should be the same as the local size used in creating the
4395273d9f13SBarry Smith           y vector for the matrix-vector product y = Ax.
4396273d9f13SBarry Smith . n     - This value should be the same as the local size used in creating the
439745f401ebSJose E. Roman           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4398273d9f13SBarry Smith           calculated if N is given) For square matrices n is almost always m.
439911a5261eSBarry Smith . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
440011a5261eSBarry Smith . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4401273d9f13SBarry Smith . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4402273d9f13SBarry Smith           (same value is used for all local rows)
4403273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the
4404273d9f13SBarry Smith           DIAGONAL portion of the local submatrix (possibly different for each row)
44052ef1f0ffSBarry Smith           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4406273d9f13SBarry Smith           The size of this array is equal to the number of local rows, i.e 'm'.
4407273d9f13SBarry Smith . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4408273d9f13SBarry Smith           submatrix (same value is used for all local rows).
4409273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the
4410273d9f13SBarry Smith           OFF-DIAGONAL portion of the local submatrix (possibly different for
44112ef1f0ffSBarry Smith           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4412273d9f13SBarry Smith           structure. The size of this array is equal to the number
4413273d9f13SBarry Smith           of local rows, i.e 'm'.
4414273d9f13SBarry Smith 
4415273d9f13SBarry Smith   Output Parameter:
4416273d9f13SBarry Smith . A - the matrix
4417273d9f13SBarry Smith 
441827430b45SBarry Smith   Options Database Keys:
441927430b45SBarry Smith + -mat_no_inode                     - Do not use inodes
442027430b45SBarry Smith . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
442127430b45SBarry Smith - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4422727bdf9bSBarry Smith                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4423727bdf9bSBarry Smith                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
442427430b45SBarry Smith 
44252ef1f0ffSBarry Smith   Level: intermediate
44262ef1f0ffSBarry Smith 
442727430b45SBarry Smith   Notes:
442877433607SBarry Smith   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4429f6f02116SRichard Tran Mills   MatXXXXSetPreallocation() paradigm instead of this routine directly.
443011a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4431175b88e8SBarry Smith 
443249a6f317SBarry Smith   If the *_nnz parameter is given then the *_nz parameter is ignored
443349a6f317SBarry Smith 
44342ef1f0ffSBarry Smith   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
44352ef1f0ffSBarry Smith   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4436273d9f13SBarry Smith   storage requirements for this matrix.
4437273d9f13SBarry Smith 
443811a5261eSBarry Smith   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4439273d9f13SBarry Smith   processor than it must be used on all processors that share the object for
4440273d9f13SBarry Smith   that argument.
4441273d9f13SBarry Smith 
4442727bdf9bSBarry Smith   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4443727bdf9bSBarry Smith   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4444727bdf9bSBarry Smith 
4445273d9f13SBarry Smith   The user MUST specify either the local or global matrix dimensions
4446273d9f13SBarry Smith   (possibly both).
4447273d9f13SBarry Smith 
444833a7c187SSatish Balay   The parallel matrix is partitioned across processors such that the
4449727bdf9bSBarry Smith   first `m0` rows belong to process 0, the next `m1` rows belong to
4450727bdf9bSBarry Smith   process 1, the next `m2` rows belong to process 2, etc., where
4451727bdf9bSBarry Smith   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
445233a7c187SSatish Balay   values corresponding to [m x N] submatrix.
4453273d9f13SBarry Smith 
445433a7c187SSatish Balay   The columns are logically partitioned with the n0 columns belonging
445533a7c187SSatish Balay   to 0th partition, the next n1 columns belonging to the next
4456df3898eeSBarry Smith   partition etc.. where n0,n1,n2... are the input parameter 'n'.
445733a7c187SSatish Balay 
445833a7c187SSatish Balay   The DIAGONAL portion of the local submatrix on any given processor
445933a7c187SSatish Balay   is the submatrix corresponding to the rows and columns m,n
446033a7c187SSatish Balay   corresponding to the given processor. i.e diagonal matrix on
446133a7c187SSatish Balay   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
446233a7c187SSatish Balay   etc. The remaining portion of the local submatrix [m x (N-n)]
446333a7c187SSatish Balay   constitute the OFF-DIAGONAL portion. The example below better
446433a7c187SSatish Balay   illustrates this concept.
446533a7c187SSatish Balay 
446633a7c187SSatish Balay   For a square global matrix we define each processor's diagonal portion
446733a7c187SSatish Balay   to be its local rows and the corresponding columns (a square submatrix);
446833a7c187SSatish Balay   each processor's off-diagonal portion encompasses the remainder of the
446933a7c187SSatish Balay   local matrix (a rectangular submatrix).
4470273d9f13SBarry Smith 
44712ef1f0ffSBarry Smith   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4472273d9f13SBarry Smith 
447397d05335SKris Buschelman   When calling this routine with a single process communicator, a matrix of
44742ef1f0ffSBarry Smith   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4475da57b5cdSKarl Rupp   type of communicator, use the construction mechanism
4476da57b5cdSKarl Rupp .vb
44772ef1f0ffSBarry Smith   MatCreate(..., &A);
44782ef1f0ffSBarry Smith   MatSetType(A, MATMPIAIJ);
44792ef1f0ffSBarry Smith   MatSetSizes(A, m, n, M, N);
44802ef1f0ffSBarry Smith   MatMPIAIJSetPreallocation(A, ...);
4481da57b5cdSKarl Rupp .ve
448297d05335SKris Buschelman 
4483273d9f13SBarry Smith   By default, this format uses inodes (identical nodes) when possible.
4484273d9f13SBarry Smith   We search for consecutive rows with the same nonzero structure, thereby
4485273d9f13SBarry Smith   reusing matrix information to achieve increased efficiency.
4486273d9f13SBarry Smith 
44872920cce0SJacob Faibussowitsch   Example Usage:
4488273d9f13SBarry Smith   Consider the following 8x8 matrix with 34 non-zero values, that is
4489273d9f13SBarry Smith   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4490273d9f13SBarry Smith   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4491efc377ccSKarl Rupp   as follows
4492273d9f13SBarry Smith 
4493273d9f13SBarry Smith .vb
4494273d9f13SBarry Smith             1  2  0  |  0  3  0  |  0  4
4495273d9f13SBarry Smith     Proc0   0  5  6  |  7  0  0  |  8  0
4496273d9f13SBarry Smith             9  0 10  | 11  0  0  | 12  0
4497273d9f13SBarry Smith     -------------------------------------
4498273d9f13SBarry Smith            13  0 14  | 15 16 17  |  0  0
4499273d9f13SBarry Smith     Proc1   0 18  0  | 19 20 21  |  0  0
4500273d9f13SBarry Smith             0  0  0  | 22 23  0  | 24  0
4501273d9f13SBarry Smith     -------------------------------------
4502273d9f13SBarry Smith     Proc2  25 26 27  |  0  0 28  | 29  0
4503273d9f13SBarry Smith            30  0  0  | 31 32 33  |  0 34
4504273d9f13SBarry Smith .ve
4505273d9f13SBarry Smith 
4506da57b5cdSKarl Rupp   This can be represented as a collection of submatrices as
4507273d9f13SBarry Smith 
4508273d9f13SBarry Smith .vb
4509273d9f13SBarry Smith       A B C
4510273d9f13SBarry Smith       D E F
4511273d9f13SBarry Smith       G H I
4512273d9f13SBarry Smith .ve
4513273d9f13SBarry Smith 
4514273d9f13SBarry Smith   Where the submatrices A,B,C are owned by proc0, D,E,F are
4515273d9f13SBarry Smith   owned by proc1, G,H,I are owned by proc2.
4516273d9f13SBarry Smith 
4517273d9f13SBarry Smith   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4518273d9f13SBarry Smith   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4519273d9f13SBarry Smith   The 'M','N' parameters are 8,8, and have the same values on all procs.
4520273d9f13SBarry Smith 
4521273d9f13SBarry Smith   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4522273d9f13SBarry Smith   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4523273d9f13SBarry Smith   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4524273d9f13SBarry Smith   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
452527430b45SBarry Smith   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4526273d9f13SBarry Smith   matrix, ans [DF] as another SeqAIJ matrix.
4527273d9f13SBarry Smith 
45282ef1f0ffSBarry Smith   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
45292ef1f0ffSBarry Smith   allocated for every row of the local diagonal submatrix, and `o_nz`
4530273d9f13SBarry Smith   storage locations are allocated for every row of the OFF-DIAGONAL submat.
45312ef1f0ffSBarry Smith   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4532273d9f13SBarry Smith   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
45332ef1f0ffSBarry Smith   In this case, the values of `d_nz`,`o_nz` are
4534273d9f13SBarry Smith .vb
453527430b45SBarry Smith      proc0  dnz = 2, o_nz = 2
453627430b45SBarry Smith      proc1  dnz = 3, o_nz = 2
453727430b45SBarry Smith      proc2  dnz = 1, o_nz = 4
4538273d9f13SBarry Smith .ve
45392ef1f0ffSBarry Smith   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4540273d9f13SBarry Smith   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4541273d9f13SBarry Smith   for proc3. i.e we are using 12+15+10=37 storage locations to store
4542273d9f13SBarry Smith   34 values.
4543273d9f13SBarry Smith 
45442ef1f0ffSBarry Smith   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4545a5b23f4aSJose E. Roman   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4546da57b5cdSKarl Rupp   In the above case the values for d_nnz,o_nnz are
4547273d9f13SBarry Smith .vb
454827430b45SBarry Smith      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
454927430b45SBarry Smith      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
455027430b45SBarry Smith      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4551273d9f13SBarry Smith .ve
4552273d9f13SBarry Smith   Here the space allocated is sum of all the above values i.e 34, and
4553273d9f13SBarry Smith   hence pre-allocation is perfect.
4554273d9f13SBarry Smith 
45551cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4556727bdf9bSBarry Smith           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4557727bdf9bSBarry Smith           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4558273d9f13SBarry Smith @*/
4559d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4560d71ae5a4SJacob Faibussowitsch {
4561b1d57f15SBarry Smith   PetscMPIInt size;
4562273d9f13SBarry Smith 
4563273d9f13SBarry Smith   PetscFunctionBegin;
45649566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
45659566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
45669566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4567273d9f13SBarry Smith   if (size > 1) {
45689566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPIAIJ));
45699566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4570273d9f13SBarry Smith   } else {
45719566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQAIJ));
45729566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4573273d9f13SBarry Smith   }
45743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4575273d9f13SBarry Smith }
4576195d93cdSBarry Smith 
45770b98dbb4SBarry Smith /*MC
45780b98dbb4SBarry Smith     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
45790b98dbb4SBarry Smith 
45800b98dbb4SBarry Smith     Synopsis:
45810b98dbb4SBarry Smith     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
45820b98dbb4SBarry Smith 
45830b98dbb4SBarry Smith     Not Collective
45840b98dbb4SBarry Smith 
45850b98dbb4SBarry Smith     Input Parameter:
45860b98dbb4SBarry Smith .   A - the `MATMPIAIJ` matrix
45870b98dbb4SBarry Smith 
45880b98dbb4SBarry Smith     Output Parameters:
45890b98dbb4SBarry Smith +   Ad - the diagonal portion of the matrix
45904cf0e950SBarry Smith .   Ao - the off-diagonal portion of the matrix
45912ef1f0ffSBarry Smith .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
45920b98dbb4SBarry Smith -   ierr - error code
45930b98dbb4SBarry Smith 
45940b98dbb4SBarry Smith      Level: advanced
45950b98dbb4SBarry Smith 
45960b98dbb4SBarry Smith     Note:
45970b98dbb4SBarry Smith     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
45980b98dbb4SBarry Smith 
45991cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
46000b98dbb4SBarry Smith M*/
46010b98dbb4SBarry Smith 
46020b98dbb4SBarry Smith /*MC
46030b98dbb4SBarry Smith     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
46040b98dbb4SBarry Smith 
46050b98dbb4SBarry Smith     Synopsis:
46060b98dbb4SBarry Smith     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
46070b98dbb4SBarry Smith 
46080b98dbb4SBarry Smith     Not Collective
46090b98dbb4SBarry Smith 
46100b98dbb4SBarry Smith     Input Parameters:
46110b98dbb4SBarry Smith +   A - the `MATMPIAIJ` matrix
46120b98dbb4SBarry Smith .   Ad - the diagonal portion of the matrix
46134cf0e950SBarry Smith .   Ao - the off-diagonal portion of the matrix
46142ef1f0ffSBarry Smith .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
46150b98dbb4SBarry Smith -   ierr - error code
46160b98dbb4SBarry Smith 
46170b98dbb4SBarry Smith      Level: advanced
46180b98dbb4SBarry Smith 
46191cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
46200b98dbb4SBarry Smith M*/
46210b98dbb4SBarry Smith 
4622127ca0efSMatthew Knepley /*@C
46230ab4885dSBarry Smith   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4624127ca0efSMatthew Knepley 
46252ef1f0ffSBarry Smith   Not Collective
4626127ca0efSMatthew Knepley 
4627127ca0efSMatthew Knepley   Input Parameter:
462811a5261eSBarry Smith . A - The `MATMPIAIJ` matrix
4629127ca0efSMatthew Knepley 
4630127ca0efSMatthew Knepley   Output Parameters:
463111a5261eSBarry Smith + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
463211a5261eSBarry Smith . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
46332ef1f0ffSBarry Smith - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4634127ca0efSMatthew Knepley 
46350ab4885dSBarry Smith   Level: intermediate
46360ab4885dSBarry Smith 
463711a5261eSBarry Smith   Note:
46382ef1f0ffSBarry Smith   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
46392ef1f0ffSBarry Smith   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
46402ef1f0ffSBarry Smith   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4641127ca0efSMatthew Knepley   local column numbers to global column numbers in the original matrix.
4642127ca0efSMatthew Knepley 
4643fe59aa6dSJacob Faibussowitsch   Fortran Notes:
46440ab4885dSBarry Smith   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4645127ca0efSMatthew Knepley 
4646fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4647127ca0efSMatthew Knepley @*/
4648d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4649d71ae5a4SJacob Faibussowitsch {
4650195d93cdSBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
465104cf37c7SBarry Smith   PetscBool   flg;
4652b1d57f15SBarry Smith 
4653195d93cdSBarry Smith   PetscFunctionBegin;
46549566063dSJacob Faibussowitsch   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
465528b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
465621e72a00SBarry Smith   if (Ad) *Ad = a->A;
465721e72a00SBarry Smith   if (Ao) *Ao = a->B;
465821e72a00SBarry Smith   if (colmap) *colmap = a->garray;
46593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4660195d93cdSBarry Smith }
4661a2243be0SBarry Smith 
4662d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4663d71ae5a4SJacob Faibussowitsch {
4664110bb6e1SHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii;
46659b8102ccSHong Zhang   PetscInt    *indx;
4666110bb6e1SHong Zhang   PetscScalar *values;
4667421ddf4dSJunchao Zhang   MatType      rootType;
46689b8102ccSHong Zhang 
46699b8102ccSHong Zhang   PetscFunctionBegin;
46709566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
4671110bb6e1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4672110bb6e1SHong Zhang     PetscInt *dnz, *onz, sum, bs, cbs;
4673110bb6e1SHong Zhang 
467448a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4675a22543b6SHong Zhang     /* Check sum(n) = N */
4676462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
467708401ef6SPierre Jolivet     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4678a22543b6SHong Zhang 
46799566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
46809b8102ccSHong Zhang     rstart -= m;
46819b8102ccSHong Zhang 
4682d0609cedSBarry Smith     MatPreallocateBegin(comm, m, n, dnz, onz);
46839b8102ccSHong Zhang     for (i = 0; i < m; i++) {
46849566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
46859566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
46869566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
46879b8102ccSHong Zhang     }
46889b8102ccSHong Zhang 
46899566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
46909566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
46919566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
46929566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
46939566063dSJacob Faibussowitsch     PetscCall(MatGetRootType_Private(inmat, &rootType));
46949566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, rootType));
46959566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
46969566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4697d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
46989566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
46999b8102ccSHong Zhang   }
47009b8102ccSHong Zhang 
4701110bb6e1SHong Zhang   /* numeric phase */
47029566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
47039b8102ccSHong Zhang   for (i = 0; i < m; i++) {
47049566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
47059b8102ccSHong Zhang     Ii = i + rstart;
47069566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
47079566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
47089b8102ccSHong Zhang   }
47099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
47109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
47113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4712c5d6d63eSBarry Smith }
4713c5d6d63eSBarry Smith 
471449abdd8aSBarry Smith static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4715d71ae5a4SJacob Faibussowitsch {
471649abdd8aSBarry Smith   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
471751a7d1a8SHong Zhang 
471851a7d1a8SHong Zhang   PetscFunctionBegin;
47193ba16761SJacob Faibussowitsch   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
47209566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->id_r));
47219566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->len_s));
47229566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->len_r));
47239566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->bi));
47249566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->bj));
47259566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_ri[0]));
47269566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_ri));
47279566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_rj[0]));
47289566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->buf_rj));
47299566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->coi));
47309566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->coj));
47319566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge->owners_co));
47329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&merge->rowmap));
47339566063dSJacob Faibussowitsch   PetscCall(PetscFree(merge));
47343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
473551a7d1a8SHong Zhang }
473651a7d1a8SHong Zhang 
4737c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
4738c6db04a5SJed Brown #include <petscbt.h>
47394ebed01fSBarry Smith 
4740d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4741d71ae5a4SJacob Faibussowitsch {
4742ce94432eSBarry Smith   MPI_Comm             comm;
474355d1abb9SHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4744b1d57f15SBarry Smith   PetscMPIInt          size, rank, taga, *len_s;
47456497c311SBarry Smith   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
47466497c311SBarry Smith   PetscMPIInt          proc, k;
4747b1d57f15SBarry Smith   PetscInt           **buf_ri, **buf_rj;
47486497c311SBarry Smith   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4749b1d57f15SBarry Smith   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
475055d1abb9SHong Zhang   MPI_Request         *s_waits, *r_waits;
475155d1abb9SHong Zhang   MPI_Status          *status;
4752fff043a9SJunchao Zhang   const MatScalar     *aa, *a_a;
4753dd6ea824SBarry Smith   MatScalar          **abuf_r, *ba_i;
475455d1abb9SHong Zhang   Mat_Merge_SeqsToMPI *merge;
4755776b82aeSLisandro Dalcin   PetscContainer       container;
475655d1abb9SHong Zhang 
475755d1abb9SHong Zhang   PetscFunctionBegin;
47589566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
47599566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
47603c2c1871SHong Zhang 
47619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
47629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
476355d1abb9SHong Zhang 
47649566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
476528b400f6SJacob Faibussowitsch   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
47669566063dSJacob Faibussowitsch   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
47679566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4768fff043a9SJunchao Zhang   aa = a_a;
4769bf0cc555SLisandro Dalcin 
477055d1abb9SHong Zhang   bi     = merge->bi;
477155d1abb9SHong Zhang   bj     = merge->bj;
477255d1abb9SHong Zhang   buf_ri = merge->buf_ri;
477355d1abb9SHong Zhang   buf_rj = merge->buf_rj;
477455d1abb9SHong Zhang 
47759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &status));
47767a2fc3feSBarry Smith   owners = merge->rowmap->range;
477755d1abb9SHong Zhang   len_s  = merge->len_s;
477855d1abb9SHong Zhang 
477955d1abb9SHong Zhang   /* send and recv matrix values */
47809566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
47819566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
478255d1abb9SHong Zhang 
47839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
478455d1abb9SHong Zhang   for (proc = 0, k = 0; proc < size; proc++) {
478555d1abb9SHong Zhang     if (!len_s[proc]) continue;
478655d1abb9SHong Zhang     i = owners[proc];
47876497c311SBarry Smith     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
478855d1abb9SHong Zhang     k++;
478955d1abb9SHong Zhang   }
479055d1abb9SHong Zhang 
47919566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
47929566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
47939566063dSJacob Faibussowitsch   PetscCall(PetscFree(status));
479455d1abb9SHong Zhang 
47959566063dSJacob Faibussowitsch   PetscCall(PetscFree(s_waits));
47969566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_waits));
479755d1abb9SHong Zhang 
479855d1abb9SHong Zhang   /* insert mat values of mpimat */
47999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N, &ba_i));
48009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
480155d1abb9SHong Zhang 
480255d1abb9SHong Zhang   for (k = 0; k < merge->nrecv; k++) {
480355d1abb9SHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804f4f49eeaSPierre Jolivet     nrows       = *buf_ri_k[k];
480555d1abb9SHong Zhang     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4806a5b23f4aSJose E. Roman     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
480755d1abb9SHong Zhang   }
480855d1abb9SHong Zhang 
480955d1abb9SHong Zhang   /* set values of ba */
48107a2fc3feSBarry Smith   m = merge->rowmap->n;
481155d1abb9SHong Zhang   for (i = 0; i < m; i++) {
481255d1abb9SHong Zhang     arow = owners[rank] + i;
481355d1abb9SHong Zhang     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
481455d1abb9SHong Zhang     bnzi = bi[i + 1] - bi[i];
48159566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ba_i, bnzi));
481655d1abb9SHong Zhang 
481755d1abb9SHong Zhang     /* add local non-zero vals of this proc's seqmat into ba */
481855d1abb9SHong Zhang     anzi   = ai[arow + 1] - ai[arow];
481955d1abb9SHong Zhang     aj     = a->j + ai[arow];
4820fff043a9SJunchao Zhang     aa     = a_a + ai[arow];
482155d1abb9SHong Zhang     nextaj = 0;
482255d1abb9SHong Zhang     for (j = 0; nextaj < anzi; j++) {
482355d1abb9SHong Zhang       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
482455d1abb9SHong Zhang         ba_i[j] += aa[nextaj++];
482555d1abb9SHong Zhang       }
482655d1abb9SHong Zhang     }
482755d1abb9SHong Zhang 
482855d1abb9SHong Zhang     /* add received vals into ba */
482955d1abb9SHong Zhang     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
483055d1abb9SHong Zhang       /* i-th row */
483155d1abb9SHong Zhang       if (i == *nextrow[k]) {
483255d1abb9SHong Zhang         anzi   = *(nextai[k] + 1) - *nextai[k];
4833f4f49eeaSPierre Jolivet         aj     = buf_rj[k] + *nextai[k];
4834f4f49eeaSPierre Jolivet         aa     = abuf_r[k] + *nextai[k];
483555d1abb9SHong Zhang         nextaj = 0;
483655d1abb9SHong Zhang         for (j = 0; nextaj < anzi; j++) {
483755d1abb9SHong Zhang           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
483855d1abb9SHong Zhang             ba_i[j] += aa[nextaj++];
483955d1abb9SHong Zhang           }
484055d1abb9SHong Zhang         }
48419371c9d4SSatish Balay         nextrow[k]++;
48429371c9d4SSatish Balay         nextai[k]++;
484355d1abb9SHong Zhang       }
484455d1abb9SHong Zhang     }
48459566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
484655d1abb9SHong Zhang   }
48479566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
48489566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
48499566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
485055d1abb9SHong Zhang 
48519566063dSJacob Faibussowitsch   PetscCall(PetscFree(abuf_r[0]));
48529566063dSJacob Faibussowitsch   PetscCall(PetscFree(abuf_r));
48539566063dSJacob Faibussowitsch   PetscCall(PetscFree(ba_i));
48549566063dSJacob Faibussowitsch   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
48559566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
48563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
485755d1abb9SHong Zhang }
485838f152feSBarry Smith 
4859d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4860d71ae5a4SJacob Faibussowitsch {
486155a3bba9SHong Zhang   Mat                  B_mpi;
4862c2234fe3SHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4863b1d57f15SBarry Smith   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4864b1d57f15SBarry Smith   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4865d0f46423SBarry Smith   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
48666497c311SBarry Smith   PetscInt             len, *dnz, *onz, bs, cbs;
4867c599c493SJunchao Zhang   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4868b1d57f15SBarry Smith   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
486955d1abb9SHong Zhang   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
487058cb9c82SHong Zhang   MPI_Status          *status;
48710298fd71SBarry Smith   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4872be0fcf8dSHong Zhang   PetscBT              lnkbt;
487351a7d1a8SHong Zhang   Mat_Merge_SeqsToMPI *merge;
4874776b82aeSLisandro Dalcin   PetscContainer       container;
487502c68681SHong Zhang 
4876e5f2cdd8SHong Zhang   PetscFunctionBegin;
48779566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
48783c2c1871SHong Zhang 
487938f152feSBarry Smith   /* make sure it is a PETSc comm */
48809566063dSJacob Faibussowitsch   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
48819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
48829566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
488355d1abb9SHong Zhang 
48849566063dSJacob Faibussowitsch   PetscCall(PetscNew(&merge));
48859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &status));
4886e5f2cdd8SHong Zhang 
48876abd8857SHong Zhang   /* determine row ownership */
48889566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
48899566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
48909566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
48919566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
48929566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(merge->rowmap));
48939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &len_si));
48949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &merge->len_s));
489555d1abb9SHong Zhang 
48967a2fc3feSBarry Smith   m      = merge->rowmap->n;
48977a2fc3feSBarry Smith   owners = merge->rowmap->range;
48986abd8857SHong Zhang 
48996abd8857SHong Zhang   /* determine the number of messages to send, their lengths */
49003e06a4e6SHong Zhang   len_s = merge->len_s;
490151a7d1a8SHong Zhang 
49022257cef7SHong Zhang   len          = 0; /* length of buf_si[] */
4903c2234fe3SHong Zhang   merge->nsend = 0;
49046497c311SBarry Smith   for (PetscMPIInt proc = 0; proc < size; proc++) {
49052257cef7SHong Zhang     len_si[proc] = 0;
49063e06a4e6SHong Zhang     if (proc == rank) {
49076abd8857SHong Zhang       len_s[proc] = 0;
49083e06a4e6SHong Zhang     } else {
49096497c311SBarry Smith       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
49106497c311SBarry Smith       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
49113e06a4e6SHong Zhang     }
49123e06a4e6SHong Zhang     if (len_s[proc]) {
4913c2234fe3SHong Zhang       merge->nsend++;
49142257cef7SHong Zhang       nrows = 0;
49152257cef7SHong Zhang       for (i = owners[proc]; i < owners[proc + 1]; i++) {
49162257cef7SHong Zhang         if (ai[i + 1] > ai[i]) nrows++;
49172257cef7SHong Zhang       }
49186497c311SBarry Smith       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
49192257cef7SHong Zhang       len += len_si[proc];
4920409913e3SHong Zhang     }
492158cb9c82SHong Zhang   }
4922409913e3SHong Zhang 
49232257cef7SHong Zhang   /* determine the number and length of messages to receive for ij-structure */
49249566063dSJacob Faibussowitsch   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
49259566063dSJacob Faibussowitsch   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4926671beff6SHong Zhang 
49273e06a4e6SHong Zhang   /* post the Irecv of j-structure */
49289566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm, &tagj));
49299566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
493002c68681SHong Zhang 
49313e06a4e6SHong Zhang   /* post the Isend of j-structure */
49329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
49333e06a4e6SHong Zhang 
49346497c311SBarry Smith   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4935409913e3SHong Zhang     if (!len_s[proc]) continue;
493602c68681SHong Zhang     i = owners[proc];
49376497c311SBarry Smith     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
493851a7d1a8SHong Zhang     k++;
493951a7d1a8SHong Zhang   }
494051a7d1a8SHong Zhang 
49413e06a4e6SHong Zhang   /* receives and sends of j-structure are complete */
49429566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
49439566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
494402c68681SHong Zhang 
494502c68681SHong Zhang   /* send and recv i-structure */
49469566063dSJacob Faibussowitsch   PetscCall(PetscCommGetNewTag(comm, &tagi));
49479566063dSJacob Faibussowitsch   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
494802c68681SHong Zhang 
49499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(len + 1, &buf_s));
49503e06a4e6SHong Zhang   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
49516497c311SBarry Smith   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
495202c68681SHong Zhang     if (!len_s[proc]) continue;
49533e06a4e6SHong Zhang     /* form outgoing message for i-structure:
49543e06a4e6SHong Zhang          buf_si[0]:                 nrows to be sent
49553e06a4e6SHong Zhang                [1:nrows]:           row index (global)
49563e06a4e6SHong Zhang                [nrows+1:2*nrows+1]: i-structure index
49573e06a4e6SHong Zhang     */
49582257cef7SHong Zhang     nrows       = len_si[proc] / 2 - 1;
49593e06a4e6SHong Zhang     buf_si_i    = buf_si + nrows + 1;
49603e06a4e6SHong Zhang     buf_si[0]   = nrows;
49613e06a4e6SHong Zhang     buf_si_i[0] = 0;
49623e06a4e6SHong Zhang     nrows       = 0;
49633e06a4e6SHong Zhang     for (i = owners[proc]; i < owners[proc + 1]; i++) {
49643e06a4e6SHong Zhang       anzi = ai[i + 1] - ai[i];
49653e06a4e6SHong Zhang       if (anzi) {
49663e06a4e6SHong Zhang         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
49673e06a4e6SHong Zhang         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
49683e06a4e6SHong Zhang         nrows++;
49693e06a4e6SHong Zhang       }
49703e06a4e6SHong Zhang     }
49716497c311SBarry Smith     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
497202c68681SHong Zhang     k++;
49732257cef7SHong Zhang     buf_si += len_si[proc];
497402c68681SHong Zhang   }
49752257cef7SHong Zhang 
49769566063dSJacob Faibussowitsch   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
49779566063dSJacob Faibussowitsch   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
497802c68681SHong Zhang 
49799566063dSJacob Faibussowitsch   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
498048a46eb9SPierre Jolivet   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
49813e06a4e6SHong Zhang 
49829566063dSJacob Faibussowitsch   PetscCall(PetscFree(len_si));
49839566063dSJacob Faibussowitsch   PetscCall(PetscFree(len_ri));
49849566063dSJacob Faibussowitsch   PetscCall(PetscFree(rj_waits));
49859566063dSJacob Faibussowitsch   PetscCall(PetscFree2(si_waits, sj_waits));
49869566063dSJacob Faibussowitsch   PetscCall(PetscFree(ri_waits));
49879566063dSJacob Faibussowitsch   PetscCall(PetscFree(buf_s));
49889566063dSJacob Faibussowitsch   PetscCall(PetscFree(status));
498958cb9c82SHong Zhang 
4990bcc1bcd5SHong Zhang   /* compute a local seq matrix in each processor */
499158cb9c82SHong Zhang   /* allocate bi array and free space for accumulating nonzero column info */
49929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &bi));
499358cb9c82SHong Zhang   bi[0] = 0;
499458cb9c82SHong Zhang 
4995be0fcf8dSHong Zhang   /* create and initialize a linked list */
4996be0fcf8dSHong Zhang   nlnk = N + 1;
49979566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
499858cb9c82SHong Zhang 
4999bcc1bcd5SHong Zhang   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5000bcc1bcd5SHong Zhang   len = ai[owners[rank + 1]] - ai[owners[rank]];
50019566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
50022205254eSKarl Rupp 
500358cb9c82SHong Zhang   current_space = free_space;
500458cb9c82SHong Zhang 
5005bcc1bcd5SHong Zhang   /* determine symbolic info for each local row */
50069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
50071d79065fSBarry Smith 
50083e06a4e6SHong Zhang   for (k = 0; k < merge->nrecv; k++) {
50092257cef7SHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
50103e06a4e6SHong Zhang     nrows       = *buf_ri_k[k];
50113e06a4e6SHong Zhang     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5012a5b23f4aSJose E. Roman     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
50133e06a4e6SHong Zhang   }
50142257cef7SHong Zhang 
5015d0609cedSBarry Smith   MatPreallocateBegin(comm, m, n, dnz, onz);
5016bcc1bcd5SHong Zhang   len = 0;
501758cb9c82SHong Zhang   for (i = 0; i < m; i++) {
501858cb9c82SHong Zhang     bnzi = 0;
501958cb9c82SHong Zhang     /* add local non-zero cols of this proc's seqmat into lnk */
502058cb9c82SHong Zhang     arow = owners[rank] + i;
502158cb9c82SHong Zhang     anzi = ai[arow + 1] - ai[arow];
502258cb9c82SHong Zhang     aj   = a->j + ai[arow];
50239566063dSJacob Faibussowitsch     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
502458cb9c82SHong Zhang     bnzi += nlnk;
502558cb9c82SHong Zhang     /* add received col data into lnk */
502651a7d1a8SHong Zhang     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
502755d1abb9SHong Zhang       if (i == *nextrow[k]) {            /* i-th row */
50283e06a4e6SHong Zhang         anzi = *(nextai[k] + 1) - *nextai[k];
50293e06a4e6SHong Zhang         aj   = buf_rj[k] + *nextai[k];
50309566063dSJacob Faibussowitsch         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
50313e06a4e6SHong Zhang         bnzi += nlnk;
50329371c9d4SSatish Balay         nextrow[k]++;
50339371c9d4SSatish Balay         nextai[k]++;
50343e06a4e6SHong Zhang       }
503558cb9c82SHong Zhang     }
5036bcc1bcd5SHong Zhang     if (len < bnzi) len = bnzi; /* =max(bnzi) */
503758cb9c82SHong Zhang 
503858cb9c82SHong Zhang     /* if free space is not available, make more free space */
503948a46eb9SPierre Jolivet     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
504058cb9c82SHong Zhang     /* copy data into free space, then initialize lnk */
50419566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
50429566063dSJacob Faibussowitsch     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5043bcc1bcd5SHong Zhang 
504458cb9c82SHong Zhang     current_space->array += bnzi;
504558cb9c82SHong Zhang     current_space->local_used += bnzi;
504658cb9c82SHong Zhang     current_space->local_remaining -= bnzi;
504758cb9c82SHong Zhang 
504858cb9c82SHong Zhang     bi[i + 1] = bi[i] + bnzi;
504958cb9c82SHong Zhang   }
5050bcc1bcd5SHong Zhang 
50519566063dSJacob Faibussowitsch   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5052bcc1bcd5SHong Zhang 
50539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
50549566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
50559566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
5056409913e3SHong Zhang 
5057bcc1bcd5SHong Zhang   /* create symbolic parallel matrix B_mpi */
50589566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
50599566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, &B_mpi));
506054b84b50SHong Zhang   if (n == PETSC_DECIDE) {
50619566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
506254b84b50SHong Zhang   } else {
50639566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
506454b84b50SHong Zhang   }
50659566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
50669566063dSJacob Faibussowitsch   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
50679566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5068d0609cedSBarry Smith   MatPreallocateEnd(dnz, onz);
50699566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
507058cb9c82SHong Zhang 
507190431a8fSHong Zhang   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
50726abd8857SHong Zhang   B_mpi->assembled = PETSC_FALSE;
5073affca5deSHong Zhang   merge->bi        = bi;
5074affca5deSHong Zhang   merge->bj        = bj;
507502c68681SHong Zhang   merge->buf_ri    = buf_ri;
507602c68681SHong Zhang   merge->buf_rj    = buf_rj;
50770298fd71SBarry Smith   merge->coi       = NULL;
50780298fd71SBarry Smith   merge->coj       = NULL;
50790298fd71SBarry Smith   merge->owners_co = NULL;
5080affca5deSHong Zhang 
50819566063dSJacob Faibussowitsch   PetscCall(PetscCommDestroy(&comm));
5082bf0cc555SLisandro Dalcin 
5083affca5deSHong Zhang   /* attach the supporting struct to B_mpi for reuse */
50849566063dSJacob Faibussowitsch   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
50859566063dSJacob Faibussowitsch   PetscCall(PetscContainerSetPointer(container, merge));
508649abdd8aSBarry Smith   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
50879566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
50889566063dSJacob Faibussowitsch   PetscCall(PetscContainerDestroy(&container));
5089affca5deSHong Zhang   *mpimat = B_mpi;
509038f152feSBarry Smith 
50919566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
50923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5093e5f2cdd8SHong Zhang }
509425616d81SHong Zhang 
5095cc4c1da9SBarry Smith /*@
509611a5261eSBarry Smith   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5097d4036a1aSHong Zhang   matrices from each processor
5098d4036a1aSHong Zhang 
5099d083f849SBarry Smith   Collective
5100d4036a1aSHong Zhang 
5101d4036a1aSHong Zhang   Input Parameters:
5102d4036a1aSHong Zhang + comm   - the communicators the parallel matrix will live on
5103d4036a1aSHong Zhang . seqmat - the input sequential matrices
510411a5261eSBarry Smith . m      - number of local rows (or `PETSC_DECIDE`)
510511a5261eSBarry Smith . n      - number of local columns (or `PETSC_DECIDE`)
510611a5261eSBarry Smith - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5107d4036a1aSHong Zhang 
5108d4036a1aSHong Zhang   Output Parameter:
5109d4036a1aSHong Zhang . mpimat - the parallel matrix generated
5110d4036a1aSHong Zhang 
5111d4036a1aSHong Zhang   Level: advanced
5112d4036a1aSHong Zhang 
511311a5261eSBarry Smith   Note:
5114d4036a1aSHong Zhang   The dimensions of the sequential matrix in each processor MUST be the same.
5115d4036a1aSHong Zhang   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5116b6971eaeSBarry Smith   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
51172ef1f0ffSBarry Smith 
51181cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5119d4036a1aSHong Zhang @*/
5120d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5121d71ae5a4SJacob Faibussowitsch {
51227e63b356SHong Zhang   PetscMPIInt size;
512355d1abb9SHong Zhang 
512455d1abb9SHong Zhang   PetscFunctionBegin;
51259566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
51267e63b356SHong Zhang   if (size == 1) {
51279566063dSJacob Faibussowitsch     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
51287e63b356SHong Zhang     if (scall == MAT_INITIAL_MATRIX) {
51299566063dSJacob Faibussowitsch       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
51307e63b356SHong Zhang     } else {
51319566063dSJacob Faibussowitsch       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
51327e63b356SHong Zhang     }
51339566063dSJacob Faibussowitsch     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
51343ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
51357e63b356SHong Zhang   }
51369566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
513748a46eb9SPierre Jolivet   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
51389566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
51399566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
51403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
514155d1abb9SHong Zhang }
51424ebed01fSBarry Smith 
5143bc08b0f1SBarry Smith /*@
51442920cce0SJacob Faibussowitsch   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
51458a9c020eSBarry Smith 
51468a9c020eSBarry Smith   Not Collective
51478a9c020eSBarry Smith 
51482fe279fdSBarry Smith   Input Parameter:
514920f4b53cSBarry Smith . A - the matrix
51508a9c020eSBarry Smith 
51518a9c020eSBarry Smith   Output Parameter:
51528a9c020eSBarry Smith . A_loc - the local sequential matrix generated
51538a9c020eSBarry Smith 
51548a9c020eSBarry Smith   Level: developer
51558a9c020eSBarry Smith 
51568a9c020eSBarry Smith   Notes:
51572920cce0SJacob Faibussowitsch   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
51582920cce0SJacob Faibussowitsch   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
51592920cce0SJacob Faibussowitsch   `n` is the global column count obtained with `MatGetSize()`
51602920cce0SJacob Faibussowitsch 
516111a5261eSBarry Smith   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
51628a9c020eSBarry Smith 
51632920cce0SJacob Faibussowitsch   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
51642920cce0SJacob Faibussowitsch 
516511a5261eSBarry Smith   Destroy the matrix with `MatDestroy()`
51668a9c020eSBarry Smith 
51671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
51688a9c020eSBarry Smith @*/
5169d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5170d71ae5a4SJacob Faibussowitsch {
51718a9c020eSBarry Smith   PetscBool mpi;
51728a9c020eSBarry Smith 
51738a9c020eSBarry Smith   PetscFunctionBegin;
51748a9c020eSBarry Smith   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
51758a9c020eSBarry Smith   if (mpi) {
51768a9c020eSBarry Smith     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
51778a9c020eSBarry Smith   } else {
51788a9c020eSBarry Smith     *A_loc = A;
51798a9c020eSBarry Smith     PetscCall(PetscObjectReference((PetscObject)*A_loc));
51808a9c020eSBarry Smith   }
51813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
51828a9c020eSBarry Smith }
51838a9c020eSBarry Smith 
51848a9c020eSBarry Smith /*@
51852920cce0SJacob Faibussowitsch   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
518625616d81SHong Zhang 
518732fba14fSHong Zhang   Not Collective
518825616d81SHong Zhang 
518925616d81SHong Zhang   Input Parameters:
519025616d81SHong Zhang + A     - the matrix
519111a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
519225616d81SHong Zhang 
519325616d81SHong Zhang   Output Parameter:
519425616d81SHong Zhang . A_loc - the local sequential matrix generated
519525616d81SHong Zhang 
519625616d81SHong Zhang   Level: developer
519725616d81SHong Zhang 
519877c65a98SStefano Zampini   Notes:
51992920cce0SJacob Faibussowitsch   The matrix is created by taking all `A`'s local rows and putting them into a sequential
52002920cce0SJacob Faibussowitsch   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
52012920cce0SJacob Faibussowitsch   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
52022920cce0SJacob Faibussowitsch 
520311a5261eSBarry Smith   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
52048a9c020eSBarry Smith 
52052920cce0SJacob Faibussowitsch   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
52062920cce0SJacob Faibussowitsch   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
52072920cce0SJacob Faibussowitsch   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
52082920cce0SJacob Faibussowitsch   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
520977c65a98SStefano Zampini 
52101cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
521125616d81SHong Zhang @*/
5212d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5213d71ae5a4SJacob Faibussowitsch {
521401b7ae99SHong Zhang   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5215b78526a6SJose E. Roman   Mat_SeqAIJ        *mat, *a, *b;
5216b78526a6SJose E. Roman   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5217ce496241SStefano Zampini   const PetscScalar *aa, *ba, *aav, *bav;
5218ce496241SStefano Zampini   PetscScalar       *ca, *cam;
521977c65a98SStefano Zampini   PetscMPIInt        size;
5220d0f46423SBarry Smith   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
52215a7d977cSHong Zhang   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
52228661ff28SBarry Smith   PetscBool          match;
522325616d81SHong Zhang 
522425616d81SHong Zhang   PetscFunctionBegin;
52259566063dSJacob Faibussowitsch   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
522628b400f6SJacob Faibussowitsch   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
52279566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
522877c65a98SStefano Zampini   if (size == 1) {
522977c65a98SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
52309566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
523177c65a98SStefano Zampini       *A_loc = mpimat->A;
523277c65a98SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
52339566063dSJacob Faibussowitsch       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
523477c65a98SStefano Zampini     }
52353ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
523677c65a98SStefano Zampini   }
523770a9ba44SHong Zhang 
52389566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5239f4f49eeaSPierre Jolivet   a  = (Mat_SeqAIJ *)mpimat->A->data;
5240f4f49eeaSPierre Jolivet   b  = (Mat_SeqAIJ *)mpimat->B->data;
52419371c9d4SSatish Balay   ai = a->i;
52429371c9d4SSatish Balay   aj = a->j;
52439371c9d4SSatish Balay   bi = b->i;
52449371c9d4SSatish Balay   bj = b->j;
52459566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
52469566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5247ce496241SStefano Zampini   aa = aav;
5248ce496241SStefano Zampini   ba = bav;
524901b7ae99SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
52509566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1 + am, &ci));
5251dea91ad1SHong Zhang     ci[0] = 0;
5252ad540459SPierre Jolivet     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
52539566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1 + ci[am], &cj));
52549566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5255dea91ad1SHong Zhang     k = 0;
525601b7ae99SHong Zhang     for (i = 0; i < am; i++) {
52575a7d977cSHong Zhang       ncols_o = bi[i + 1] - bi[i];
52585a7d977cSHong Zhang       ncols_d = ai[i + 1] - ai[i];
525901b7ae99SHong Zhang       /* off-diagonal portion of A */
52605a7d977cSHong Zhang       for (jo = 0; jo < ncols_o; jo++) {
52615a7d977cSHong Zhang         col = cmap[*bj];
52625a7d977cSHong Zhang         if (col >= cstart) break;
52639371c9d4SSatish Balay         cj[k] = col;
52649371c9d4SSatish Balay         bj++;
52655a7d977cSHong Zhang         ca[k++] = *ba++;
52665a7d977cSHong Zhang       }
52675a7d977cSHong Zhang       /* diagonal portion of A */
52685a7d977cSHong Zhang       for (j = 0; j < ncols_d; j++) {
52695a7d977cSHong Zhang         cj[k]   = cstart + *aj++;
52705a7d977cSHong Zhang         ca[k++] = *aa++;
52715a7d977cSHong Zhang       }
52725a7d977cSHong Zhang       /* off-diagonal portion of A */
52735a7d977cSHong Zhang       for (j = jo; j < ncols_o; j++) {
52745a7d977cSHong Zhang         cj[k]   = cmap[*bj++];
52755a7d977cSHong Zhang         ca[k++] = *ba++;
52765a7d977cSHong Zhang       }
527725616d81SHong Zhang     }
5278dea91ad1SHong Zhang     /* put together the new matrix */
52799566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5280dea91ad1SHong Zhang     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5281dea91ad1SHong Zhang     /* Since these are PETSc arrays, change flags to free them as necessary. */
5282dea91ad1SHong Zhang     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5283e6b907acSBarry Smith     mat->free_a  = PETSC_TRUE;
5284e6b907acSBarry Smith     mat->free_ij = PETSC_TRUE;
5285dea91ad1SHong Zhang     mat->nonew   = 0;
52865a7d977cSHong Zhang   } else if (scall == MAT_REUSE_MATRIX) {
52875a7d977cSHong Zhang     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5288fff043a9SJunchao Zhang     ci  = mat->i;
5289fff043a9SJunchao Zhang     cj  = mat->j;
52909566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
52915a7d977cSHong Zhang     for (i = 0; i < am; i++) {
52925a7d977cSHong Zhang       /* off-diagonal portion of A */
52935a7d977cSHong Zhang       ncols_o = bi[i + 1] - bi[i];
52945a7d977cSHong Zhang       for (jo = 0; jo < ncols_o; jo++) {
52955a7d977cSHong Zhang         col = cmap[*bj];
52965a7d977cSHong Zhang         if (col >= cstart) break;
52979371c9d4SSatish Balay         *cam++ = *ba++;
52989371c9d4SSatish Balay         bj++;
52995a7d977cSHong Zhang       }
53005a7d977cSHong Zhang       /* diagonal portion of A */
5301ecc9b87dSHong Zhang       ncols_d = ai[i + 1] - ai[i];
5302a77337e4SBarry Smith       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
53035a7d977cSHong Zhang       /* off-diagonal portion of A */
5304f33d1a9aSHong Zhang       for (j = jo; j < ncols_o; j++) {
53059371c9d4SSatish Balay         *cam++ = *ba++;
53069371c9d4SSatish Balay         bj++;
5307f33d1a9aSHong Zhang       }
53085a7d977cSHong Zhang     }
53099566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
531098921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
53119566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
53129566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
53139566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
53143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
531525616d81SHong Zhang }
531625616d81SHong Zhang 
5317ed502f03SStefano Zampini /*@
531811a5261eSBarry Smith   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
53194cf0e950SBarry Smith   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5320ed502f03SStefano Zampini 
5321ed502f03SStefano Zampini   Not Collective
5322ed502f03SStefano Zampini 
5323ed502f03SStefano Zampini   Input Parameters:
5324ed502f03SStefano Zampini + A     - the matrix
532511a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5326ed502f03SStefano Zampini 
5327d8d19677SJose E. Roman   Output Parameters:
53282ef1f0ffSBarry Smith + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5329ed502f03SStefano Zampini - A_loc - the local sequential matrix generated
5330ed502f03SStefano Zampini 
5331ed502f03SStefano Zampini   Level: developer
5332ed502f03SStefano Zampini 
533311a5261eSBarry Smith   Note:
53342ef1f0ffSBarry Smith   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
53354cf0e950SBarry Smith   part, then those associated with the off-diagonal part (in its local ordering)
5336ed502f03SStefano Zampini 
53371cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5338ed502f03SStefano Zampini @*/
5339d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5340d71ae5a4SJacob Faibussowitsch {
5341ed502f03SStefano Zampini   Mat             Ao, Ad;
5342ed502f03SStefano Zampini   const PetscInt *cmap;
5343ed502f03SStefano Zampini   PetscMPIInt     size;
5344ed502f03SStefano Zampini   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5345ed502f03SStefano Zampini 
5346ed502f03SStefano Zampini   PetscFunctionBegin;
53479566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
53489566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5349ed502f03SStefano Zampini   if (size == 1) {
5350ed502f03SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
53519566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)Ad));
5352ed502f03SStefano Zampini       *A_loc = Ad;
5353ed502f03SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
53549566063dSJacob Faibussowitsch       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5355ed502f03SStefano Zampini     }
53569566063dSJacob Faibussowitsch     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
53573ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
5358ed502f03SStefano Zampini   }
53599566063dSJacob Faibussowitsch   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
53609566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5361ed502f03SStefano Zampini   if (f) {
53629566063dSJacob Faibussowitsch     PetscCall((*f)(A, scall, glob, A_loc));
5363ed502f03SStefano Zampini   } else {
5364ed502f03SStefano Zampini     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5365ed502f03SStefano Zampini     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5366ed502f03SStefano Zampini     Mat_SeqAIJ        *c;
5367ed502f03SStefano Zampini     PetscInt          *ai = a->i, *aj = a->j;
5368ed502f03SStefano Zampini     PetscInt          *bi = b->i, *bj = b->j;
5369ed502f03SStefano Zampini     PetscInt          *ci, *cj;
5370ed502f03SStefano Zampini     const PetscScalar *aa, *ba;
5371ed502f03SStefano Zampini     PetscScalar       *ca;
5372ed502f03SStefano Zampini     PetscInt           i, j, am, dn, on;
5373ed502f03SStefano Zampini 
53749566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(Ad, &am, &dn));
53759566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(Ao, NULL, &on));
53769566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
53779566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5378ed502f03SStefano Zampini     if (scall == MAT_INITIAL_MATRIX) {
5379ed502f03SStefano Zampini       PetscInt k;
53809566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(1 + am, &ci));
53819566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
53829566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5383ed502f03SStefano Zampini       ci[0] = 0;
5384ed502f03SStefano Zampini       for (i = 0, k = 0; i < am; i++) {
5385ed502f03SStefano Zampini         const PetscInt ncols_o = bi[i + 1] - bi[i];
5386ed502f03SStefano Zampini         const PetscInt ncols_d = ai[i + 1] - ai[i];
5387ed502f03SStefano Zampini         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5388ed502f03SStefano Zampini         /* diagonal portion of A */
5389ed502f03SStefano Zampini         for (j = 0; j < ncols_d; j++, k++) {
5390ed502f03SStefano Zampini           cj[k] = *aj++;
5391ed502f03SStefano Zampini           ca[k] = *aa++;
5392ed502f03SStefano Zampini         }
5393ed502f03SStefano Zampini         /* off-diagonal portion of A */
5394ed502f03SStefano Zampini         for (j = 0; j < ncols_o; j++, k++) {
5395ed502f03SStefano Zampini           cj[k] = dn + *bj++;
5396ed502f03SStefano Zampini           ca[k] = *ba++;
5397ed502f03SStefano Zampini         }
5398ed502f03SStefano Zampini       }
5399ed502f03SStefano Zampini       /* put together the new matrix */
54009566063dSJacob Faibussowitsch       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5401ed502f03SStefano Zampini       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5402ed502f03SStefano Zampini       /* Since these are PETSc arrays, change flags to free them as necessary. */
5403ed502f03SStefano Zampini       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5404ed502f03SStefano Zampini       c->free_a  = PETSC_TRUE;
5405ed502f03SStefano Zampini       c->free_ij = PETSC_TRUE;
5406ed502f03SStefano Zampini       c->nonew   = 0;
54079566063dSJacob Faibussowitsch       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5408ed502f03SStefano Zampini     } else if (scall == MAT_REUSE_MATRIX) {
54099566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5410ed502f03SStefano Zampini       for (i = 0; i < am; i++) {
5411ed502f03SStefano Zampini         const PetscInt ncols_d = ai[i + 1] - ai[i];
5412ed502f03SStefano Zampini         const PetscInt ncols_o = bi[i + 1] - bi[i];
5413ed502f03SStefano Zampini         /* diagonal portion of A */
5414ed502f03SStefano Zampini         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5415ed502f03SStefano Zampini         /* off-diagonal portion of A */
5416ed502f03SStefano Zampini         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5417ed502f03SStefano Zampini       }
54189566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
541998921bdaSJacob Faibussowitsch     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
54209566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
54219566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5422ed502f03SStefano Zampini     if (glob) {
5423ed502f03SStefano Zampini       PetscInt cst, *gidx;
5424ed502f03SStefano Zampini 
54259566063dSJacob Faibussowitsch       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
54269566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(dn + on, &gidx));
5427ed502f03SStefano Zampini       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5428ed502f03SStefano Zampini       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
54299566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5430ed502f03SStefano Zampini     }
5431ed502f03SStefano Zampini   }
54329566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
54333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5434ed502f03SStefano Zampini }
5435ed502f03SStefano Zampini 
543632fba14fSHong Zhang /*@C
543711a5261eSBarry Smith   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
543832fba14fSHong Zhang 
543932fba14fSHong Zhang   Not Collective
544032fba14fSHong Zhang 
544132fba14fSHong Zhang   Input Parameters:
544232fba14fSHong Zhang + A     - the matrix
544311a5261eSBarry Smith . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
544420f4b53cSBarry Smith . row   - index set of rows to extract (or `NULL`)
544520f4b53cSBarry Smith - col   - index set of columns to extract (or `NULL`)
544632fba14fSHong Zhang 
544732fba14fSHong Zhang   Output Parameter:
544832fba14fSHong Zhang . A_loc - the local sequential matrix generated
544932fba14fSHong Zhang 
545032fba14fSHong Zhang   Level: developer
545132fba14fSHong Zhang 
54521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
545332fba14fSHong Zhang @*/
5454d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5455d71ae5a4SJacob Faibussowitsch {
545632fba14fSHong Zhang   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
545732fba14fSHong Zhang   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
545832fba14fSHong Zhang   IS          isrowa, iscola;
545932fba14fSHong Zhang   Mat        *aloc;
54604a2b5492SBarry Smith   PetscBool   match;
546132fba14fSHong Zhang 
546232fba14fSHong Zhang   PetscFunctionBegin;
54639566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
546428b400f6SJacob Faibussowitsch   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
54659566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
546632fba14fSHong Zhang   if (!row) {
54679371c9d4SSatish Balay     start = A->rmap->rstart;
54689371c9d4SSatish Balay     end   = A->rmap->rend;
54699566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
547032fba14fSHong Zhang   } else {
547132fba14fSHong Zhang     isrowa = *row;
547232fba14fSHong Zhang   }
547332fba14fSHong Zhang   if (!col) {
5474d0f46423SBarry Smith     start = A->cmap->rstart;
547532fba14fSHong Zhang     cmap  = a->garray;
5476d0f46423SBarry Smith     nzA   = a->A->cmap->n;
5477d0f46423SBarry Smith     nzB   = a->B->cmap->n;
54789566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nzA + nzB, &idx));
547932fba14fSHong Zhang     ncols = 0;
548032fba14fSHong Zhang     for (i = 0; i < nzB; i++) {
548132fba14fSHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
548232fba14fSHong Zhang       else break;
548332fba14fSHong Zhang     }
548432fba14fSHong Zhang     imark = i;
548532fba14fSHong Zhang     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
548632fba14fSHong Zhang     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
54879566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
548832fba14fSHong Zhang   } else {
548932fba14fSHong Zhang     iscola = *col;
549032fba14fSHong Zhang   }
549132fba14fSHong Zhang   if (scall != MAT_INITIAL_MATRIX) {
54929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1, &aloc));
549332fba14fSHong Zhang     aloc[0] = *A_loc;
549432fba14fSHong Zhang   }
54959566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5496109e0772SStefano Zampini   if (!col) { /* attach global id of condensed columns */
54979566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5498109e0772SStefano Zampini   }
549932fba14fSHong Zhang   *A_loc = aloc[0];
55009566063dSJacob Faibussowitsch   PetscCall(PetscFree(aloc));
550148a46eb9SPierre Jolivet   if (!row) PetscCall(ISDestroy(&isrowa));
550248a46eb9SPierre Jolivet   if (!col) PetscCall(ISDestroy(&iscola));
55039566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
55043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
550532fba14fSHong Zhang }
550632fba14fSHong Zhang 
55075c65b9ecSFande Kong /*
55085c65b9ecSFande Kong  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
55095c65b9ecSFande Kong  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
55105c65b9ecSFande Kong  * on a global size.
55115c65b9ecSFande Kong  * */
5512ba38deedSJacob Faibussowitsch static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5513d71ae5a4SJacob Faibussowitsch {
55145c65b9ecSFande Kong   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5515f4f49eeaSPierre Jolivet   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5516131c27b5Sprj-   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5517131c27b5Sprj-   PetscMPIInt            owner;
55185c65b9ecSFande Kong   PetscSFNode           *iremote, *oiremote;
55195c65b9ecSFande Kong   const PetscInt        *lrowindices;
55205c65b9ecSFande Kong   PetscSF                sf, osf;
55215c65b9ecSFande Kong   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
55225c65b9ecSFande Kong   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
55235c65b9ecSFande Kong   MPI_Comm               comm;
55245c65b9ecSFande Kong   ISLocalToGlobalMapping mapping;
5525fff043a9SJunchao Zhang   const PetscScalar     *pd_a, *po_a;
55265c65b9ecSFande Kong 
55275c65b9ecSFande Kong   PetscFunctionBegin;
55289566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
55295c65b9ecSFande Kong   /* plocalsize is the number of roots
55305c65b9ecSFande Kong    * nrows is the number of leaves
55315c65b9ecSFande Kong    * */
55329566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
55339566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(rows, &nrows));
55349566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows, &iremote));
55359566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(rows, &lrowindices));
55365c65b9ecSFande Kong   for (i = 0; i < nrows; i++) {
55375c65b9ecSFande Kong     /* Find a remote index and an owner for a row
55385c65b9ecSFande Kong      * The row could be local or remote
55395c65b9ecSFande Kong      * */
554034bcad68SFande Kong     owner = 0;
554134bcad68SFande Kong     lidx  = 0;
55429566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
55435c65b9ecSFande Kong     iremote[i].index = lidx;
55445c65b9ecSFande Kong     iremote[i].rank  = owner;
55455c65b9ecSFande Kong   }
55465c65b9ecSFande Kong   /* Create SF to communicate how many nonzero columns for each row */
55479566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf));
5548f332b1cbSPierre Jolivet   /* SF will figure out the number of nonzero columns for each row, and their
55495c65b9ecSFande Kong    * offsets
55505c65b9ecSFande Kong    * */
55519566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
55529566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
55539566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
5554bc8e477aSFande Kong 
55559566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
55569566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
55579566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(nrows, &pnnz));
55585c65b9ecSFande Kong   roffsets[0] = 0;
55595c65b9ecSFande Kong   roffsets[1] = 0;
55605c65b9ecSFande Kong   for (i = 0; i < plocalsize; i++) {
55614cf0e950SBarry Smith     /* diagonal */
55625c65b9ecSFande Kong     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
55634cf0e950SBarry Smith     /* off-diagonal */
55645c65b9ecSFande Kong     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
55655c65b9ecSFande Kong     /* compute offsets so that we relative location for each row */
55665c65b9ecSFande Kong     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
55675c65b9ecSFande Kong     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
55685c65b9ecSFande Kong   }
55699566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
55709566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
55715c65b9ecSFande Kong   /* 'r' means root, and 'l' means leaf */
55729566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
55739566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
55749566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
55759566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
55769566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
55779566063dSJacob Faibussowitsch   PetscCall(PetscFree(roffsets));
55789566063dSJacob Faibussowitsch   PetscCall(PetscFree(nrcols));
55795c65b9ecSFande Kong   dntotalcols = 0;
55805c65b9ecSFande Kong   ontotalcols = 0;
5581bc8e477aSFande Kong   ncol        = 0;
55825c65b9ecSFande Kong   for (i = 0; i < nrows; i++) {
55835c65b9ecSFande Kong     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5584bc8e477aSFande Kong     ncol    = PetscMax(pnnz[i], ncol);
55854cf0e950SBarry Smith     /* diagonal */
55865c65b9ecSFande Kong     dntotalcols += nlcols[i * 2 + 0];
55874cf0e950SBarry Smith     /* off-diagonal */
55885c65b9ecSFande Kong     ontotalcols += nlcols[i * 2 + 1];
55895c65b9ecSFande Kong   }
55905c65b9ecSFande Kong   /* We do not need to figure the right number of columns
55915c65b9ecSFande Kong    * since all the calculations will be done by going through the raw data
55925c65b9ecSFande Kong    * */
55939566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
55949566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*P_oth));
55959566063dSJacob Faibussowitsch   PetscCall(PetscFree(pnnz));
55965c65b9ecSFande Kong   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
55974cf0e950SBarry Smith   /* diagonal */
55989566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(dntotalcols, &iremote));
55994cf0e950SBarry Smith   /* off-diagonal */
56009566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
56014cf0e950SBarry Smith   /* diagonal */
56029566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
56034cf0e950SBarry Smith   /* off-diagonal */
56049566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
56055c65b9ecSFande Kong   dntotalcols = 0;
56065c65b9ecSFande Kong   ontotalcols = 0;
56075c65b9ecSFande Kong   ntotalcols  = 0;
56085c65b9ecSFande Kong   for (i = 0; i < nrows; i++) {
560934bcad68SFande Kong     owner = 0;
56109566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
56115c65b9ecSFande Kong     /* Set iremote for diag matrix */
56125c65b9ecSFande Kong     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
56135c65b9ecSFande Kong       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
56145c65b9ecSFande Kong       iremote[dntotalcols].rank  = owner;
56155c65b9ecSFande Kong       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
56165c65b9ecSFande Kong       ilocal[dntotalcols++] = ntotalcols++;
56175c65b9ecSFande Kong     }
56184cf0e950SBarry Smith     /* off-diagonal */
56195c65b9ecSFande Kong     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
56205c65b9ecSFande Kong       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
56215c65b9ecSFande Kong       oiremote[ontotalcols].rank  = owner;
56225c65b9ecSFande Kong       oilocal[ontotalcols++]      = ntotalcols++;
56235c65b9ecSFande Kong     }
56245c65b9ecSFande Kong   }
56259566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(rows, &lrowindices));
56269566063dSJacob Faibussowitsch   PetscCall(PetscFree(loffsets));
56279566063dSJacob Faibussowitsch   PetscCall(PetscFree(nlcols));
56289566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf));
56295c65b9ecSFande Kong   /* P serves as roots and P_oth is leaves
56305c65b9ecSFande Kong    * Diag matrix
56315c65b9ecSFande Kong    * */
56329566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
56339566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf));
56349566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
56355c65b9ecSFande Kong 
56369566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &osf));
56374cf0e950SBarry Smith   /* off-diagonal */
56389566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
56399566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(osf));
56409566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(osf));
56419566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
56429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
56434cf0e950SBarry Smith   /* operate on the matrix internal data to save memory */
56449566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
56459566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
56469566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
56475c65b9ecSFande Kong   /* Convert to global indices for diag matrix */
56485c65b9ecSFande Kong   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
56499566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
56505c65b9ecSFande Kong   /* We want P_oth store global indices */
56519566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
56525c65b9ecSFande Kong   /* Use memory scalable approach */
56539566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
56549566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
56559566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
56569566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
56575c65b9ecSFande Kong   /* Convert back to local indices */
56585c65b9ecSFande Kong   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
56599566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
56605c65b9ecSFande Kong   nout = 0;
56619566063dSJacob Faibussowitsch   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
566208401ef6SPierre Jolivet   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
56639566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
56645c65b9ecSFande Kong   /* Exchange values */
56659566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
56669566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
56679566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
56689566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
56695c65b9ecSFande Kong   /* Stop PETSc from shrinking memory */
56705c65b9ecSFande Kong   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
56719566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
56729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
56735c65b9ecSFande Kong   /* Attach PetscSF objects to P_oth so that we can reuse it later */
56749566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
56759566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
56769566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
56779566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&osf));
56783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56795c65b9ecSFande Kong }
56805c65b9ecSFande Kong 
56815c65b9ecSFande Kong /*
56825c65b9ecSFande Kong  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
56835c65b9ecSFande Kong  * This supports MPIAIJ and MAIJ
56845c65b9ecSFande Kong  * */
5685d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5686d71ae5a4SJacob Faibussowitsch {
56875c65b9ecSFande Kong   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5688bc8e477aSFande Kong   Mat_SeqAIJ *p_oth;
5689bc8e477aSFande Kong   IS          rows, map;
5690bc8e477aSFande Kong   PetscHMapI  hamp;
5691bc8e477aSFande Kong   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
56925c65b9ecSFande Kong   MPI_Comm    comm;
56935c65b9ecSFande Kong   PetscSF     sf, osf;
5694bc8e477aSFande Kong   PetscBool   has;
56955c65b9ecSFande Kong 
56965c65b9ecSFande Kong   PetscFunctionBegin;
56979566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
56989566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
56995c65b9ecSFande Kong   /* If it is the first time, create an index set of off-diag nonzero columns of A,
57005c65b9ecSFande Kong    *  and then create a submatrix (that often is an overlapping matrix)
57015c65b9ecSFande Kong    * */
57025c65b9ecSFande Kong   if (reuse == MAT_INITIAL_MATRIX) {
57035c65b9ecSFande Kong     /* Use a hash table to figure out unique keys */
5704eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
57059566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5706bc8e477aSFande Kong     count = 0;
5707bc8e477aSFande Kong     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5708bc8e477aSFande Kong     for (i = 0; i < a->B->cmap->n; i++) {
5709bc8e477aSFande Kong       key = a->garray[i] / dof;
57109566063dSJacob Faibussowitsch       PetscCall(PetscHMapIHas(hamp, key, &has));
5711bc8e477aSFande Kong       if (!has) {
5712bc8e477aSFande Kong         mapping[i] = count;
57139566063dSJacob Faibussowitsch         PetscCall(PetscHMapISet(hamp, key, count++));
5714bc8e477aSFande Kong       } else {
5715bc8e477aSFande Kong         /* Current 'i' has the same value the previous step */
5716bc8e477aSFande Kong         mapping[i] = count - 1;
57175c65b9ecSFande Kong       }
5718bc8e477aSFande Kong     }
57199566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
57209566063dSJacob Faibussowitsch     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5721eec179cfSJacob Faibussowitsch     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
57229566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(htsize, &rowindices));
57235c65b9ecSFande Kong     off = 0;
57249566063dSJacob Faibussowitsch     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
57259566063dSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&hamp));
57269566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(htsize, rowindices));
57279566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
57285c65b9ecSFande Kong     /* In case, the matrix was already created but users want to recreate the matrix */
57299566063dSJacob Faibussowitsch     PetscCall(MatDestroy(P_oth));
57309566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
57319566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
57329566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&map));
57339566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&rows));
57345c65b9ecSFande Kong   } else if (reuse == MAT_REUSE_MATRIX) {
57355c65b9ecSFande Kong     /* If matrix was already created, we simply update values using SF objects
573635cb6cd3SPierre Jolivet      * that as attached to the matrix earlier.
5737fff043a9SJunchao Zhang      */
5738fff043a9SJunchao Zhang     const PetscScalar *pd_a, *po_a;
5739fff043a9SJunchao Zhang 
57409566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
57419566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
574208401ef6SPierre Jolivet     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
57435c65b9ecSFande Kong     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
57445c65b9ecSFande Kong     /* Update values in place */
57459566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
57469566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
57479566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
57489566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
57499566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
57509566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
57519566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
57529566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
57536718818eSStefano Zampini   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
57549566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
57553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
57565c65b9ecSFande Kong }
57575c65b9ecSFande Kong 
575825616d81SHong Zhang /*@C
575920f4b53cSBarry Smith   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
576025616d81SHong Zhang 
5761c3339decSBarry Smith   Collective
576225616d81SHong Zhang 
576325616d81SHong Zhang   Input Parameters:
576411a5261eSBarry Smith + A     - the first matrix in `MATMPIAIJ` format
576511a5261eSBarry Smith . B     - the second matrix in `MATMPIAIJ` format
576611a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
576725616d81SHong Zhang 
5768f1a722f8SMatthew G. Knepley   Output Parameters:
576927430b45SBarry Smith + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
577027430b45SBarry Smith . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5771f1a722f8SMatthew G. Knepley - B_seq - the sequential matrix generated
577225616d81SHong Zhang 
577325616d81SHong Zhang   Level: developer
577425616d81SHong Zhang 
577520f4b53cSBarry Smith .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
577625616d81SHong Zhang @*/
5777d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5778d71ae5a4SJacob Faibussowitsch {
5779899cda47SBarry Smith   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5780b1d57f15SBarry Smith   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
578125616d81SHong Zhang   IS          isrowb, iscolb;
57820298fd71SBarry Smith   Mat        *bseq = NULL;
578325616d81SHong Zhang 
578425616d81SHong Zhang   PetscFunctionBegin;
578520f4b53cSBarry Smith   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
578620f4b53cSBarry Smith              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
57879566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
578825616d81SHong Zhang 
578925616d81SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5790d0f46423SBarry Smith     start = A->cmap->rstart;
579125616d81SHong Zhang     cmap  = a->garray;
5792d0f46423SBarry Smith     nzA   = a->A->cmap->n;
5793d0f46423SBarry Smith     nzB   = a->B->cmap->n;
57949566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nzA + nzB, &idx));
579525616d81SHong Zhang     ncols = 0;
57960390132cSHong Zhang     for (i = 0; i < nzB; i++) { /* row < local row index */
579725616d81SHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
579825616d81SHong Zhang       else break;
579925616d81SHong Zhang     }
580025616d81SHong Zhang     imark = i;
58010390132cSHong Zhang     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
58020390132cSHong Zhang     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
58039566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
58049566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
580525616d81SHong Zhang   } else {
580608401ef6SPierre Jolivet     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
58079371c9d4SSatish Balay     isrowb = *rowb;
58089371c9d4SSatish Balay     iscolb = *colb;
58099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(1, &bseq));
581025616d81SHong Zhang     bseq[0] = *B_seq;
581125616d81SHong Zhang   }
58129566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
581325616d81SHong Zhang   *B_seq = bseq[0];
58149566063dSJacob Faibussowitsch   PetscCall(PetscFree(bseq));
581525616d81SHong Zhang   if (!rowb) {
58169566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isrowb));
581725616d81SHong Zhang   } else {
581825616d81SHong Zhang     *rowb = isrowb;
581925616d81SHong Zhang   }
582025616d81SHong Zhang   if (!colb) {
58219566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscolb));
582225616d81SHong Zhang   } else {
582325616d81SHong Zhang     *colb = iscolb;
582425616d81SHong Zhang   }
58259566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
58263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
582725616d81SHong Zhang }
5828429d309bSHong Zhang 
5829f8487c73SHong Zhang /*
583027430b45SBarry Smith     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
583101b7ae99SHong Zhang     of the OFF-DIAGONAL portion of local A
5832429d309bSHong Zhang 
5833c3339decSBarry Smith     Collective
5834429d309bSHong Zhang 
5835429d309bSHong Zhang    Input Parameters:
583627430b45SBarry Smith +    A,B - the matrices in `MATMPIAIJ` format
583727430b45SBarry Smith -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5838429d309bSHong Zhang 
5839429d309bSHong Zhang    Output Parameter:
58400298fd71SBarry Smith +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
58410298fd71SBarry Smith .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
58420298fd71SBarry Smith .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5843598bc09dSHong Zhang -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5844429d309bSHong Zhang 
584511a5261eSBarry Smith     Developer Note:
584611a5261eSBarry Smith     This directly accesses information inside the VecScatter associated with the matrix-vector product
58476eb45d04SBarry Smith      for this matrix. This is not desirable..
58486eb45d04SBarry Smith 
5849429d309bSHong Zhang     Level: developer
5850429d309bSHong Zhang 
5851f8487c73SHong Zhang */
58526497c311SBarry Smith 
5853d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5854d71ae5a4SJacob Faibussowitsch {
5855899cda47SBarry Smith   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
58564b8d542aSHong Zhang   VecScatter         ctx;
5857ce94432eSBarry Smith   MPI_Comm           comm;
58583515ee7fSJunchao Zhang   const PetscMPIInt *rprocs, *sprocs;
58596497c311SBarry Smith   PetscMPIInt        nrecvs, nsends;
58603515ee7fSJunchao Zhang   const PetscInt    *srow, *rstarts, *sstarts;
5861277f51e8SBarry Smith   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
58626497c311SBarry Smith   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5863277f51e8SBarry Smith   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5864ddea5d60SJunchao Zhang   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5865ddea5d60SJunchao Zhang   PetscMPIInt        size, tag, rank, nreqs;
5866429d309bSHong Zhang 
5867429d309bSHong Zhang   PetscFunctionBegin;
58689566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
58699566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
5870a7c7454dSHong Zhang 
587120f4b53cSBarry Smith   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
587220f4b53cSBarry Smith              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
58739566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
58749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5875a6b2eed2SHong Zhang 
5876ec07b8f8SHong Zhang   if (size == 1) {
5877ec07b8f8SHong Zhang     startsj_s = NULL;
5878ec07b8f8SHong Zhang     bufa_ptr  = NULL;
587952f7967eSHong Zhang     *B_oth    = NULL;
58803ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
5881ec07b8f8SHong Zhang   }
5882ec07b8f8SHong Zhang 
5883fa83eaafSHong Zhang   ctx = a->Mvctx;
58844b8d542aSHong Zhang   tag = ((PetscObject)ctx)->tag;
58854b8d542aSHong Zhang 
58869566063dSJacob Faibussowitsch   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
58873515ee7fSJunchao Zhang   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
58889566063dSJacob Faibussowitsch   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
58899566063dSJacob Faibussowitsch   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
58909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nreqs, &reqs));
5891ddea5d60SJunchao Zhang   rwaits = reqs;
58928e3a54c0SPierre Jolivet   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5893429d309bSHong Zhang 
5894b7f45c76SHong Zhang   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5895429d309bSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
5896a6b2eed2SHong Zhang     /* i-array */
5897a6b2eed2SHong Zhang     /*  post receives */
58989566063dSJacob Faibussowitsch     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5899a6b2eed2SHong Zhang     for (i = 0; i < nrecvs; i++) {
590074268593SBarry Smith       rowlen = rvalues + rstarts[i] * rbs;
5901e42f35eeSHong Zhang       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
59026497c311SBarry Smith       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5903429d309bSHong Zhang     }
5904a6b2eed2SHong Zhang 
5905a6b2eed2SHong Zhang     /* pack the outgoing message */
59069566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
59072205254eSKarl Rupp 
59082205254eSKarl Rupp     sstartsj[0] = 0;
59092205254eSKarl Rupp     rstartsj[0] = 0;
5910a6b2eed2SHong Zhang     len         = 0; /* total length of j or a array to be sent */
59113515ee7fSJunchao Zhang     if (nsends) {
59123515ee7fSJunchao Zhang       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
59139566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
59143515ee7fSJunchao Zhang     }
5915a6b2eed2SHong Zhang     for (i = 0; i < nsends; i++) {
59163515ee7fSJunchao Zhang       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5917e42f35eeSHong Zhang       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
591887025532SHong Zhang       for (j = 0; j < nrows; j++) {
5919d0f46423SBarry Smith         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5920e42f35eeSHong Zhang         for (l = 0; l < sbs; l++) {
59219566063dSJacob Faibussowitsch           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
59222205254eSKarl Rupp 
5923e42f35eeSHong Zhang           rowlen[j * sbs + l] = ncols;
59242205254eSKarl Rupp 
5925e42f35eeSHong Zhang           len += ncols;
59269566063dSJacob Faibussowitsch           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5927e42f35eeSHong Zhang         }
5928a6b2eed2SHong Zhang         k++;
5929429d309bSHong Zhang       }
59306497c311SBarry Smith       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
59312205254eSKarl Rupp 
5932dea91ad1SHong Zhang       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5933429d309bSHong Zhang     }
593487025532SHong Zhang     /* recvs and sends of i-array are completed */
59359566063dSJacob Faibussowitsch     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
59369566063dSJacob Faibussowitsch     PetscCall(PetscFree(svalues));
5937e42f35eeSHong Zhang 
5938a6b2eed2SHong Zhang     /* allocate buffers for sending j and a arrays */
59399566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len + 1, &bufj));
59409566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len + 1, &bufa));
5941a6b2eed2SHong Zhang 
594287025532SHong Zhang     /* create i-array of B_oth */
59439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
59442205254eSKarl Rupp 
594587025532SHong Zhang     b_othi[0] = 0;
5946a6b2eed2SHong Zhang     len       = 0; /* total length of j or a array to be received */
5947a6b2eed2SHong Zhang     k         = 0;
5948a6b2eed2SHong Zhang     for (i = 0; i < nrecvs; i++) {
59493515ee7fSJunchao Zhang       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
59503515ee7fSJunchao Zhang       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
595187025532SHong Zhang       for (j = 0; j < nrows; j++) {
595287025532SHong Zhang         b_othi[k + 1] = b_othi[k] + rowlen[j];
59539566063dSJacob Faibussowitsch         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5954f91af8c7SBarry Smith         k++;
5955a6b2eed2SHong Zhang       }
5956dea91ad1SHong Zhang       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5957a6b2eed2SHong Zhang     }
59589566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvalues));
5959a6b2eed2SHong Zhang 
59606aad120cSJose E. Roman     /* allocate space for j and a arrays of B_oth */
59619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
59629566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5963a6b2eed2SHong Zhang 
596487025532SHong Zhang     /* j-array */
5965a6b2eed2SHong Zhang     /*  post receives of j-array */
5966a6b2eed2SHong Zhang     for (i = 0; i < nrecvs; i++) {
596787025532SHong Zhang       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
59686497c311SBarry Smith       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5969a6b2eed2SHong Zhang     }
5970e42f35eeSHong Zhang 
5971e42f35eeSHong Zhang     /* pack the outgoing message j-array */
59723515ee7fSJunchao Zhang     if (nsends) k = sstarts[0];
5973a6b2eed2SHong Zhang     for (i = 0; i < nsends; i++) {
5974e42f35eeSHong Zhang       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5975a6b2eed2SHong Zhang       bufJ  = bufj + sstartsj[i];
597687025532SHong Zhang       for (j = 0; j < nrows; j++) {
5977d0f46423SBarry Smith         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5978e42f35eeSHong Zhang         for (ll = 0; ll < sbs; ll++) {
59799566063dSJacob Faibussowitsch           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5980ad540459SPierre Jolivet           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
59819566063dSJacob Faibussowitsch           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5982e42f35eeSHong Zhang         }
598387025532SHong Zhang       }
59846497c311SBarry Smith       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
598587025532SHong Zhang     }
598687025532SHong Zhang 
598787025532SHong Zhang     /* recvs and sends of j-array are completed */
59889566063dSJacob Faibussowitsch     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
598987025532SHong Zhang   } else if (scall == MAT_REUSE_MATRIX) {
5990b7f45c76SHong Zhang     sstartsj = *startsj_s;
59911d79065fSBarry Smith     rstartsj = *startsj_r;
599287025532SHong Zhang     bufa     = *bufa_ptr;
59939566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5994ddea5d60SJunchao Zhang   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
599587025532SHong Zhang 
599687025532SHong Zhang   /* a-array */
599787025532SHong Zhang   /*  post receives of a-array */
599887025532SHong Zhang   for (i = 0; i < nrecvs; i++) {
599987025532SHong Zhang     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
60006497c311SBarry Smith     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
600187025532SHong Zhang   }
6002e42f35eeSHong Zhang 
6003e42f35eeSHong Zhang   /* pack the outgoing message a-array */
60043515ee7fSJunchao Zhang   if (nsends) k = sstarts[0];
600587025532SHong Zhang   for (i = 0; i < nsends; i++) {
6006e42f35eeSHong Zhang     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
600787025532SHong Zhang     bufA  = bufa + sstartsj[i];
600887025532SHong Zhang     for (j = 0; j < nrows; j++) {
6009d0f46423SBarry Smith       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6010e42f35eeSHong Zhang       for (ll = 0; ll < sbs; ll++) {
60119566063dSJacob Faibussowitsch         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6012ad540459SPierre Jolivet         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
60139566063dSJacob Faibussowitsch         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6014e42f35eeSHong Zhang       }
6015a6b2eed2SHong Zhang     }
60166497c311SBarry Smith     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6017a6b2eed2SHong Zhang   }
601887025532SHong Zhang   /* recvs and sends of a-array are completed */
60199566063dSJacob Faibussowitsch   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
60209566063dSJacob Faibussowitsch   PetscCall(PetscFree(reqs));
6021a6b2eed2SHong Zhang 
602287025532SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
6023dd460d27SBarry Smith     Mat_SeqAIJ *b_oth;
6024dd460d27SBarry Smith 
6025a6b2eed2SHong Zhang     /* put together the new matrix */
60269566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6027a6b2eed2SHong Zhang 
6028a6b2eed2SHong Zhang     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6029a6b2eed2SHong Zhang     /* Since these are PETSc arrays, change flags to free them as necessary. */
603087025532SHong Zhang     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6031e6b907acSBarry Smith     b_oth->free_a  = PETSC_TRUE;
6032e6b907acSBarry Smith     b_oth->free_ij = PETSC_TRUE;
603387025532SHong Zhang     b_oth->nonew   = 0;
6034a6b2eed2SHong Zhang 
60359566063dSJacob Faibussowitsch     PetscCall(PetscFree(bufj));
6036b7f45c76SHong Zhang     if (!startsj_s || !bufa_ptr) {
60379566063dSJacob Faibussowitsch       PetscCall(PetscFree2(sstartsj, rstartsj));
60389566063dSJacob Faibussowitsch       PetscCall(PetscFree(bufa_ptr));
6039dea91ad1SHong Zhang     } else {
6040b7f45c76SHong Zhang       *startsj_s = sstartsj;
60411d79065fSBarry Smith       *startsj_r = rstartsj;
604287025532SHong Zhang       *bufa_ptr  = bufa;
604387025532SHong Zhang     }
6044fff043a9SJunchao Zhang   } else if (scall == MAT_REUSE_MATRIX) {
60459566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6046dea91ad1SHong Zhang   }
60473515ee7fSJunchao Zhang 
60489566063dSJacob Faibussowitsch   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
60499566063dSJacob Faibussowitsch   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
60509566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
60513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6052429d309bSHong Zhang }
6053ccd8e176SBarry Smith 
6054cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6055cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6056ca9cdca7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
60579779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE)
6058a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6059191b95cbSRichard Tran Mills #endif
6060ae8d29abSPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6061cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
60625d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
6063cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
60645d7652ecSHong Zhang #endif
6065d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
6066d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6067d24d4204SJose E. Roman #endif
606863c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE)
606963c07aadSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
607063c07aadSStefano Zampini #endif
60713338378cSStefano Zampini #if defined(PETSC_HAVE_CUDA)
60723338378cSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
60733338378cSStefano Zampini #endif
6074d5e393b6SSuyash Tandon #if defined(PETSC_HAVE_HIP)
6075d5e393b6SSuyash Tandon PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6076d5e393b6SSuyash Tandon #endif
60773d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
60783d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
60793d0639e7SStefano Zampini #endif
6080d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
60814222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
60824222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
608317667f90SBarry Smith 
6084fc4dec0aSBarry Smith /*
6085fc4dec0aSBarry Smith     Computes (B'*A')' since computing B*A directly is untenable
6086fc4dec0aSBarry Smith 
6087fc4dec0aSBarry Smith                n                       p                          p
60882da392ccSBarry Smith         [             ]       [             ]         [                 ]
60892da392ccSBarry Smith       m [      A      ]  *  n [       B     ]   =   m [         C       ]
60902da392ccSBarry Smith         [             ]       [             ]         [                 ]
6091fc4dec0aSBarry Smith 
6092fc4dec0aSBarry Smith */
6093d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6094d71ae5a4SJacob Faibussowitsch {
6095fc4dec0aSBarry Smith   Mat At, Bt, Ct;
6096fc4dec0aSBarry Smith 
6097fc4dec0aSBarry Smith   PetscFunctionBegin;
60989566063dSJacob Faibussowitsch   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
60999566063dSJacob Faibussowitsch   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6100fb842aefSJose E. Roman   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
61019566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&At));
61029566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Bt));
61037fb60732SBarry Smith   PetscCall(MatTransposeSetPrecursor(Ct, C));
61049566063dSJacob Faibussowitsch   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
61059566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
61063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6107fc4dec0aSBarry Smith }
6108fc4dec0aSBarry Smith 
6109d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6110d71ae5a4SJacob Faibussowitsch {
61116718818eSStefano Zampini   PetscBool cisdense;
6112fc4dec0aSBarry Smith 
6113fc4dec0aSBarry Smith   PetscFunctionBegin;
611408401ef6SPierre Jolivet   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
61159566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
61169566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6117d5e393b6SSuyash Tandon   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
611848a46eb9SPierre Jolivet   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
61199566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
6120f75ecaa4SHong Zhang 
61214222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
61223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6123fc4dec0aSBarry Smith }
6124fc4dec0aSBarry Smith 
6125d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6126d71ae5a4SJacob Faibussowitsch {
61274222ddf1SHong Zhang   Mat_Product *product = C->product;
61284222ddf1SHong Zhang   Mat          A = product->A, B = product->B;
6129fc4dec0aSBarry Smith 
6130fc4dec0aSBarry Smith   PetscFunctionBegin;
613120f4b53cSBarry Smith   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
613220f4b53cSBarry Smith              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
61334222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
61344222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
61353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6136fc4dec0aSBarry Smith }
6137fc4dec0aSBarry Smith 
6138d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6139d71ae5a4SJacob Faibussowitsch {
61404222ddf1SHong Zhang   Mat_Product *product = C->product;
61414222ddf1SHong Zhang 
61424222ddf1SHong Zhang   PetscFunctionBegin;
614348a46eb9SPierre Jolivet   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
61443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
61454222ddf1SHong Zhang }
6146394ed5ebSJunchao Zhang 
614727430b45SBarry Smith /*
614827430b45SBarry Smith    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6149394ed5ebSJunchao Zhang 
6150394ed5ebSJunchao Zhang   Input Parameters:
6151394ed5ebSJunchao Zhang 
6152651b1cf9SStefano Zampini     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6153651b1cf9SStefano Zampini     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6154394ed5ebSJunchao Zhang 
6155158ec288SJunchao Zhang     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6156394ed5ebSJunchao Zhang 
6157394ed5ebSJunchao Zhang     For Set1, j1[] contains column indices of the nonzeros.
6158394ed5ebSJunchao Zhang     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6159394ed5ebSJunchao Zhang     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6160394ed5ebSJunchao Zhang     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6161394ed5ebSJunchao Zhang 
6162394ed5ebSJunchao Zhang     Similar for Set2.
6163394ed5ebSJunchao Zhang 
6164394ed5ebSJunchao Zhang     This routine merges the two sets of nonzeros row by row and removes repeats.
6165394ed5ebSJunchao Zhang 
6166158ec288SJunchao Zhang   Output Parameters: (memory is allocated by the caller)
6167394ed5ebSJunchao Zhang 
6168394ed5ebSJunchao Zhang     i[],j[]: the CSR of the merged matrix, which has m rows.
6169394ed5ebSJunchao Zhang     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6170394ed5ebSJunchao Zhang     imap2[]: similar to imap1[], but for Set2.
6171394ed5ebSJunchao Zhang     Note we order nonzeros row-by-row and from left to right.
6172394ed5ebSJunchao Zhang */
6173d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6174d71ae5a4SJacob Faibussowitsch {
6175394ed5ebSJunchao Zhang   PetscInt   r, m; /* Row index of mat */
6176394ed5ebSJunchao Zhang   PetscCount t, t1, t2, b1, e1, b2, e2;
6177394ed5ebSJunchao Zhang 
6178394ed5ebSJunchao Zhang   PetscFunctionBegin;
61799566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, NULL));
6180394ed5ebSJunchao Zhang   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6181394ed5ebSJunchao Zhang   i[0]        = 0;
6182394ed5ebSJunchao Zhang   for (r = 0; r < m; r++) { /* Do row by row merging */
6183394ed5ebSJunchao Zhang     b1 = rowBegin1[r];
6184394ed5ebSJunchao Zhang     e1 = rowEnd1[r];
6185394ed5ebSJunchao Zhang     b2 = rowBegin2[r];
6186394ed5ebSJunchao Zhang     e2 = rowEnd2[r];
6187394ed5ebSJunchao Zhang     while (b1 < e1 && b2 < e2) {
6188394ed5ebSJunchao Zhang       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6189394ed5ebSJunchao Zhang         j[t]      = j1[b1];
6190394ed5ebSJunchao Zhang         imap1[t1] = t;
6191394ed5ebSJunchao Zhang         imap2[t2] = t;
6192394ed5ebSJunchao Zhang         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6193394ed5ebSJunchao Zhang         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
61949371c9d4SSatish Balay         t1++;
61959371c9d4SSatish Balay         t2++;
61969371c9d4SSatish Balay         t++;
6197394ed5ebSJunchao Zhang       } else if (j1[b1] < j2[b2]) {
6198394ed5ebSJunchao Zhang         j[t]      = j1[b1];
6199394ed5ebSJunchao Zhang         imap1[t1] = t;
6200394ed5ebSJunchao Zhang         b1 += jmap1[t1 + 1] - jmap1[t1];
62019371c9d4SSatish Balay         t1++;
62029371c9d4SSatish Balay         t++;
6203394ed5ebSJunchao Zhang       } else {
6204394ed5ebSJunchao Zhang         j[t]      = j2[b2];
6205394ed5ebSJunchao Zhang         imap2[t2] = t;
6206394ed5ebSJunchao Zhang         b2 += jmap2[t2 + 1] - jmap2[t2];
62079371c9d4SSatish Balay         t2++;
62089371c9d4SSatish Balay         t++;
6209394ed5ebSJunchao Zhang       }
6210394ed5ebSJunchao Zhang     }
6211394ed5ebSJunchao Zhang     /* Merge the remaining in either j1[] or j2[] */
6212394ed5ebSJunchao Zhang     while (b1 < e1) {
6213394ed5ebSJunchao Zhang       j[t]      = j1[b1];
6214394ed5ebSJunchao Zhang       imap1[t1] = t;
6215394ed5ebSJunchao Zhang       b1 += jmap1[t1 + 1] - jmap1[t1];
62169371c9d4SSatish Balay       t1++;
62179371c9d4SSatish Balay       t++;
6218394ed5ebSJunchao Zhang     }
6219394ed5ebSJunchao Zhang     while (b2 < e2) {
6220394ed5ebSJunchao Zhang       j[t]      = j2[b2];
6221394ed5ebSJunchao Zhang       imap2[t2] = t;
6222394ed5ebSJunchao Zhang       b2 += jmap2[t2 + 1] - jmap2[t2];
62239371c9d4SSatish Balay       t2++;
62249371c9d4SSatish Balay       t++;
6225394ed5ebSJunchao Zhang     }
62266497c311SBarry Smith     PetscCall(PetscIntCast(t, i + r + 1));
6227394ed5ebSJunchao Zhang   }
62283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6229394ed5ebSJunchao Zhang }
6230394ed5ebSJunchao Zhang 
623127430b45SBarry Smith /*
623227430b45SBarry Smith   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6233394ed5ebSJunchao Zhang 
6234394ed5ebSJunchao Zhang   Input Parameters:
6235394ed5ebSJunchao Zhang     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6236394ed5ebSJunchao Zhang     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6237394ed5ebSJunchao Zhang       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6238394ed5ebSJunchao Zhang 
6239394ed5ebSJunchao Zhang       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6240394ed5ebSJunchao Zhang       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6241394ed5ebSJunchao Zhang 
6242394ed5ebSJunchao Zhang   Output Parameters:
6243394ed5ebSJunchao Zhang     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6244394ed5ebSJunchao Zhang     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6245394ed5ebSJunchao Zhang       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6246394ed5ebSJunchao Zhang       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6247394ed5ebSJunchao Zhang 
6248394ed5ebSJunchao Zhang     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6249158ec288SJunchao Zhang       Atot: number of entries belonging to the diagonal block.
6250158ec288SJunchao Zhang       Annz: number of unique nonzeros belonging to the diagonal block.
6251394ed5ebSJunchao Zhang       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6252394ed5ebSJunchao Zhang         repeats (i.e., same 'i,j' pair).
6253394ed5ebSJunchao Zhang       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6254394ed5ebSJunchao Zhang         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6255394ed5ebSJunchao Zhang 
6256394ed5ebSJunchao Zhang       Atot: number of entries belonging to the diagonal block
6257394ed5ebSJunchao Zhang       Annz: number of unique nonzeros belonging to the diagonal block.
6258394ed5ebSJunchao Zhang 
6259394ed5ebSJunchao Zhang     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6260394ed5ebSJunchao Zhang 
6261158ec288SJunchao Zhang     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6262394ed5ebSJunchao Zhang */
6263d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6264d71ae5a4SJacob Faibussowitsch {
6265394ed5ebSJunchao Zhang   PetscInt    cstart, cend, rstart, rend, row, col;
6266394ed5ebSJunchao Zhang   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6267394ed5ebSJunchao Zhang   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6268394ed5ebSJunchao Zhang   PetscCount  k, m, p, q, r, s, mid;
6269394ed5ebSJunchao Zhang   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6270394ed5ebSJunchao Zhang 
6271394ed5ebSJunchao Zhang   PetscFunctionBegin;
62729566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
62739566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6274394ed5ebSJunchao Zhang   m = rend - rstart;
6275394ed5ebSJunchao Zhang 
6276651b1cf9SStefano Zampini   /* Skip negative rows */
6277651b1cf9SStefano Zampini   for (k = 0; k < n; k++)
62789371c9d4SSatish Balay     if (i[k] >= 0) break;
6279394ed5ebSJunchao Zhang 
6280394ed5ebSJunchao Zhang   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6281394ed5ebSJunchao Zhang      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6282394ed5ebSJunchao Zhang   */
6283394ed5ebSJunchao Zhang   while (k < n) {
6284394ed5ebSJunchao Zhang     row = i[k];
6285394ed5ebSJunchao Zhang     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
62869371c9d4SSatish Balay     for (s = k; s < n; s++)
62879371c9d4SSatish Balay       if (i[s] != row) break;
6288651b1cf9SStefano Zampini 
62891690c2aeSBarry Smith     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6290394ed5ebSJunchao Zhang     for (p = k; p < s; p++) {
62911690c2aeSBarry Smith       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
629254c59aa7SJacob Faibussowitsch       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6293394ed5ebSJunchao Zhang     }
62949566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6295158ec288SJunchao Zhang     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6296394ed5ebSJunchao Zhang     rowBegin[row - rstart] = k;
6297394ed5ebSJunchao Zhang     rowMid[row - rstart]   = mid;
6298394ed5ebSJunchao Zhang     rowEnd[row - rstart]   = s;
6299394ed5ebSJunchao Zhang 
6300394ed5ebSJunchao Zhang     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6301394ed5ebSJunchao Zhang     Atot += mid - k;
6302394ed5ebSJunchao Zhang     Btot += s - mid;
6303394ed5ebSJunchao Zhang 
6304651b1cf9SStefano Zampini     /* Count unique nonzeros of this diag row */
6305394ed5ebSJunchao Zhang     for (p = k; p < mid;) {
6306394ed5ebSJunchao Zhang       col = j[p];
63079371c9d4SSatish Balay       do {
63081690c2aeSBarry Smith         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
63099371c9d4SSatish Balay         p++;
6310651b1cf9SStefano Zampini       } while (p < mid && j[p] == col);
6311394ed5ebSJunchao Zhang       Annz++;
6312394ed5ebSJunchao Zhang     }
6313394ed5ebSJunchao Zhang 
6314651b1cf9SStefano Zampini     /* Count unique nonzeros of this offdiag row */
6315394ed5ebSJunchao Zhang     for (p = mid; p < s;) {
6316394ed5ebSJunchao Zhang       col = j[p];
6317d71ae5a4SJacob Faibussowitsch       do {
6318d71ae5a4SJacob Faibussowitsch         p++;
6319d71ae5a4SJacob Faibussowitsch       } while (p < s && j[p] == col);
6320394ed5ebSJunchao Zhang       Bnnz++;
6321394ed5ebSJunchao Zhang     }
6322394ed5ebSJunchao Zhang     k = s;
6323394ed5ebSJunchao Zhang   }
6324394ed5ebSJunchao Zhang 
6325394ed5ebSJunchao Zhang   /* Allocation according to Atot, Btot, Annz, Bnnz */
6326158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Atot, &Aperm));
6327158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Btot, &Bperm));
6328158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6329158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6330394ed5ebSJunchao Zhang 
63316aad120cSJose E. Roman   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6332394ed5ebSJunchao Zhang   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6333394ed5ebSJunchao Zhang   for (r = 0; r < m; r++) {
6334394ed5ebSJunchao Zhang     k   = rowBegin[r];
6335394ed5ebSJunchao Zhang     mid = rowMid[r];
6336394ed5ebSJunchao Zhang     s   = rowEnd[r];
63378e3a54c0SPierre Jolivet     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
63388e3a54c0SPierre Jolivet     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6339394ed5ebSJunchao Zhang     Atot += mid - k;
6340394ed5ebSJunchao Zhang     Btot += s - mid;
6341394ed5ebSJunchao Zhang 
6342394ed5ebSJunchao Zhang     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6343394ed5ebSJunchao Zhang     for (p = k; p < mid;) {
6344394ed5ebSJunchao Zhang       col = j[p];
6345394ed5ebSJunchao Zhang       q   = p;
6346d71ae5a4SJacob Faibussowitsch       do {
6347d71ae5a4SJacob Faibussowitsch         p++;
6348d71ae5a4SJacob Faibussowitsch       } while (p < mid && j[p] == col);
6349394ed5ebSJunchao Zhang       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6350394ed5ebSJunchao Zhang       Annz++;
6351394ed5ebSJunchao Zhang     }
6352394ed5ebSJunchao Zhang 
6353394ed5ebSJunchao Zhang     for (p = mid; p < s;) {
6354394ed5ebSJunchao Zhang       col = j[p];
6355394ed5ebSJunchao Zhang       q   = p;
6356d71ae5a4SJacob Faibussowitsch       do {
6357d71ae5a4SJacob Faibussowitsch         p++;
6358d71ae5a4SJacob Faibussowitsch       } while (p < s && j[p] == col);
6359394ed5ebSJunchao Zhang       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6360394ed5ebSJunchao Zhang       Bnnz++;
6361394ed5ebSJunchao Zhang     }
6362394ed5ebSJunchao Zhang   }
6363394ed5ebSJunchao Zhang   /* Output */
6364394ed5ebSJunchao Zhang   *Aperm_ = Aperm;
6365394ed5ebSJunchao Zhang   *Annz_  = Annz;
6366394ed5ebSJunchao Zhang   *Atot_  = Atot;
6367394ed5ebSJunchao Zhang   *Ajmap_ = Ajmap;
6368394ed5ebSJunchao Zhang   *Bperm_ = Bperm;
6369394ed5ebSJunchao Zhang   *Bnnz_  = Bnnz;
6370394ed5ebSJunchao Zhang   *Btot_  = Btot;
6371394ed5ebSJunchao Zhang   *Bjmap_ = Bjmap;
63723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6373394ed5ebSJunchao Zhang }
6374394ed5ebSJunchao Zhang 
637527430b45SBarry Smith /*
637627430b45SBarry Smith   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6377158ec288SJunchao Zhang 
6378158ec288SJunchao Zhang   Input Parameters:
6379158ec288SJunchao Zhang     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6380158ec288SJunchao Zhang     nnz:  number of unique nonzeros in the merged matrix
6381158ec288SJunchao Zhang     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6382651b1cf9SStefano Zampini     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6383158ec288SJunchao Zhang 
6384158ec288SJunchao Zhang   Output Parameter: (memory is allocated by the caller)
6385158ec288SJunchao Zhang     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6386158ec288SJunchao Zhang 
6387158ec288SJunchao Zhang   Example:
6388158ec288SJunchao Zhang     nnz1 = 4
6389158ec288SJunchao Zhang     nnz  = 6
6390158ec288SJunchao Zhang     imap = [1,3,4,5]
6391158ec288SJunchao Zhang     jmap = [0,3,5,6,7]
6392158ec288SJunchao Zhang    then,
6393158ec288SJunchao Zhang     jmap_new = [0,0,3,3,5,6,7]
6394158ec288SJunchao Zhang */
6395d71ae5a4SJacob Faibussowitsch static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6396d71ae5a4SJacob Faibussowitsch {
6397158ec288SJunchao Zhang   PetscCount k, p;
6398158ec288SJunchao Zhang 
6399158ec288SJunchao Zhang   PetscFunctionBegin;
6400158ec288SJunchao Zhang   jmap_new[0] = 0;
6401158ec288SJunchao Zhang   p           = nnz;                /* p loops over jmap_new[] backwards */
6402158ec288SJunchao Zhang   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6403158ec288SJunchao Zhang     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6404158ec288SJunchao Zhang   }
6405158ec288SJunchao Zhang   for (; p >= 0; p--) jmap_new[p] = jmap[0];
64063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6407158ec288SJunchao Zhang }
6408158ec288SJunchao Zhang 
640949abdd8aSBarry Smith static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
64102c4ab24aSJunchao Zhang {
641149abdd8aSBarry Smith   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
64122c4ab24aSJunchao Zhang 
64132c4ab24aSJunchao Zhang   PetscFunctionBegin;
64142c4ab24aSJunchao Zhang   PetscCall(PetscSFDestroy(&coo->sf));
64152c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Aperm1));
64162c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Bperm1));
64172c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Ajmap1));
64182c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Bjmap1));
64192c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Aimap2));
64202c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Bimap2));
64212c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Aperm2));
64222c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Bperm2));
64232c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Ajmap2));
64242c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Bjmap2));
64252c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo->Cperm1));
64262c4ab24aSJunchao Zhang   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
64272c4ab24aSJunchao Zhang   PetscCall(PetscFree(coo));
64282c4ab24aSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
64292c4ab24aSJunchao Zhang }
64302c4ab24aSJunchao Zhang 
6431d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6432d71ae5a4SJacob Faibussowitsch {
6433394ed5ebSJunchao Zhang   MPI_Comm             comm;
6434394ed5ebSJunchao Zhang   PetscMPIInt          rank, size;
6435394ed5ebSJunchao Zhang   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6436394ed5ebSJunchao Zhang   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6437394ed5ebSJunchao Zhang   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
64382c4ab24aSJunchao Zhang   PetscContainer       container;
64392c4ab24aSJunchao Zhang   MatCOOStruct_MPIAIJ *coo;
6440394ed5ebSJunchao Zhang 
6441394ed5ebSJunchao Zhang   PetscFunctionBegin;
64429566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpiaij->garray));
64439566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mpiaij->lvec));
6444cbc6b225SStefano Zampini #if defined(PETSC_USE_CTABLE)
6445eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6446cbc6b225SStefano Zampini #else
64479566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpiaij->colmap));
6448cbc6b225SStefano Zampini #endif
64499566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6450cbc6b225SStefano Zampini   mat->assembled     = PETSC_FALSE;
6451cbc6b225SStefano Zampini   mat->was_assembled = PETSC_FALSE;
6452cbc6b225SStefano Zampini 
64539566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
64549566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
64559566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
64569566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
64579566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
64589566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
64599566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
64609566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
64619566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &M, &N));
6462394ed5ebSJunchao Zhang 
64636aad120cSJose E. Roman   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6464394ed5ebSJunchao Zhang   /* entries come first, then local rows, then remote rows.                     */
6465394ed5ebSJunchao Zhang   PetscCount n1 = coo_n, *perm1;
6466e8729f6fSJunchao Zhang   PetscInt  *i1 = coo_i, *j1 = coo_j;
6467e8729f6fSJunchao Zhang 
6468e8729f6fSJunchao Zhang   PetscCall(PetscMalloc1(n1, &perm1));
6469394ed5ebSJunchao Zhang   for (k = 0; k < n1; k++) perm1[k] = k;
6470394ed5ebSJunchao Zhang 
6471394ed5ebSJunchao Zhang   /* Manipulate indices so that entries with negative row or col indices will have smallest
6472394ed5ebSJunchao Zhang      row indices, local entries will have greater but negative row indices, and remote entries
6473394ed5ebSJunchao Zhang      will have positive row indices.
6474394ed5ebSJunchao Zhang   */
6475394ed5ebSJunchao Zhang   for (k = 0; k < n1; k++) {
64761690c2aeSBarry Smith     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
64771690c2aeSBarry Smith     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6478f7d195e4SLawrence Mitchell     else {
6479f7d195e4SLawrence Mitchell       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
64801690c2aeSBarry Smith       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6481f7d195e4SLawrence Mitchell     }
6482394ed5ebSJunchao Zhang   }
6483394ed5ebSJunchao Zhang 
6484da81f932SPierre Jolivet   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
64859566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6486651b1cf9SStefano Zampini 
6487651b1cf9SStefano Zampini   /* Advance k to the first entry we need to take care of */
6488651b1cf9SStefano Zampini   for (k = 0; k < n1; k++)
64891690c2aeSBarry Smith     if (i1[k] > PETSC_INT_MIN) break;
64906497c311SBarry Smith   PetscCount i1start = k;
6491651b1cf9SStefano Zampini 
64921690c2aeSBarry Smith   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
64931690c2aeSBarry Smith   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6494394ed5ebSJunchao Zhang 
6495394ed5ebSJunchao Zhang   /*           Send remote rows to their owner                                  */
6496394ed5ebSJunchao Zhang   /* Find which rows should be sent to which remote ranks*/
6497394ed5ebSJunchao Zhang   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6498394ed5ebSJunchao Zhang   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6499394ed5ebSJunchao Zhang   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6500394ed5ebSJunchao Zhang   const PetscInt *ranges;
6501394ed5ebSJunchao Zhang   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6502394ed5ebSJunchao Zhang 
65039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
65049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6505394ed5ebSJunchao Zhang   for (k = rem; k < n1;) {
6506394ed5ebSJunchao Zhang     PetscMPIInt owner;
6507394ed5ebSJunchao Zhang     PetscInt    firstRow, lastRow;
6508cbc6b225SStefano Zampini 
6509394ed5ebSJunchao Zhang     /* Locate a row range */
6510394ed5ebSJunchao Zhang     firstRow = i1[k]; /* first row of this owner */
65119566063dSJacob Faibussowitsch     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6512394ed5ebSJunchao Zhang     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6513394ed5ebSJunchao Zhang 
6514394ed5ebSJunchao Zhang     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
65159566063dSJacob Faibussowitsch     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6516394ed5ebSJunchao Zhang 
6517394ed5ebSJunchao Zhang     /* All entries in [k,p) belong to this remote owner */
6518394ed5ebSJunchao Zhang     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6519394ed5ebSJunchao Zhang       PetscMPIInt *sendto2;
6520394ed5ebSJunchao Zhang       PetscInt    *nentries2;
6521394ed5ebSJunchao Zhang       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6522cbc6b225SStefano Zampini 
65239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
65249566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
65259566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
65269566063dSJacob Faibussowitsch       PetscCall(PetscFree2(sendto, nentries2));
6527394ed5ebSJunchao Zhang       sendto   = sendto2;
6528394ed5ebSJunchao Zhang       nentries = nentries2;
6529394ed5ebSJunchao Zhang       maxNsend = maxNsend2;
6530394ed5ebSJunchao Zhang     }
6531394ed5ebSJunchao Zhang     sendto[nsend] = owner;
65326497c311SBarry Smith     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6533394ed5ebSJunchao Zhang     nsend++;
6534394ed5ebSJunchao Zhang     k = p;
6535394ed5ebSJunchao Zhang   }
6536394ed5ebSJunchao Zhang 
6537394ed5ebSJunchao Zhang   /* Build 1st SF to know offsets on remote to send data */
6538394ed5ebSJunchao Zhang   PetscSF      sf1;
6539394ed5ebSJunchao Zhang   PetscInt     nroots = 1, nroots2 = 0;
6540394ed5ebSJunchao Zhang   PetscInt     nleaves = nsend, nleaves2 = 0;
6541394ed5ebSJunchao Zhang   PetscInt    *offsets;
6542394ed5ebSJunchao Zhang   PetscSFNode *iremote;
6543394ed5ebSJunchao Zhang 
65449566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf1));
65459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsend, &iremote));
65469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsend, &offsets));
6547394ed5ebSJunchao Zhang   for (k = 0; k < nsend; k++) {
6548394ed5ebSJunchao Zhang     iremote[k].rank  = sendto[k];
6549394ed5ebSJunchao Zhang     iremote[k].index = 0;
6550394ed5ebSJunchao Zhang     nleaves2 += nentries[k];
655154c59aa7SJacob Faibussowitsch     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6552394ed5ebSJunchao Zhang   }
65539566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
65549566063dSJacob Faibussowitsch   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
65559566063dSJacob Faibussowitsch   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
65569566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf1));
6557e978a55eSPierre Jolivet   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6558394ed5ebSJunchao Zhang 
6559394ed5ebSJunchao Zhang   /* Build 2nd SF to send remote COOs to their owner */
6560394ed5ebSJunchao Zhang   PetscSF sf2;
6561394ed5ebSJunchao Zhang   nroots  = nroots2;
6562394ed5ebSJunchao Zhang   nleaves = nleaves2;
65639566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &sf2));
65649566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(sf2));
65659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves, &iremote));
6566394ed5ebSJunchao Zhang   p = 0;
6567394ed5ebSJunchao Zhang   for (k = 0; k < nsend; k++) {
656854c59aa7SJacob Faibussowitsch     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6569394ed5ebSJunchao Zhang     for (q = 0; q < nentries[k]; q++, p++) {
6570394ed5ebSJunchao Zhang       iremote[p].rank = sendto[k];
65716497c311SBarry Smith       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6572394ed5ebSJunchao Zhang     }
6573394ed5ebSJunchao Zhang   }
65749566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6575394ed5ebSJunchao Zhang 
6576394ed5ebSJunchao Zhang   /* Send the remote COOs to their owner */
6577394ed5ebSJunchao Zhang   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6578394ed5ebSJunchao Zhang   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
65799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6580834dcf29SHansol Suh   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6581834dcf29SHansol Suh   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6582de0a9f35SPierre Jolivet   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6583de0a9f35SPierre Jolivet   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6584834dcf29SHansol Suh   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6585834dcf29SHansol Suh   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6586834dcf29SHansol Suh   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6587834dcf29SHansol Suh   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6588394ed5ebSJunchao Zhang 
65899566063dSJacob Faibussowitsch   PetscCall(PetscFree(offsets));
65909566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sendto, nentries));
6591394ed5ebSJunchao Zhang 
6592394ed5ebSJunchao Zhang   /* Sort received COOs by row along with the permutation array     */
6593394ed5ebSJunchao Zhang   for (k = 0; k < n2; k++) perm2[k] = k;
65949566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6595394ed5ebSJunchao Zhang 
6596651b1cf9SStefano Zampini   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6597651b1cf9SStefano Zampini   PetscCount *Cperm1;
6598834dcf29SHansol Suh   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6599de0a9f35SPierre Jolivet   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6600651b1cf9SStefano Zampini   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6601834dcf29SHansol Suh   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6602651b1cf9SStefano Zampini 
6603651b1cf9SStefano Zampini   /* Support for HYPRE matrices, kind of a hack.
6604651b1cf9SStefano Zampini      Swap min column with diagonal so that diagonal values will go first */
6605651b1cf9SStefano Zampini   PetscBool hypre;
660617b874c6SStefano Zampini   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6607651b1cf9SStefano Zampini   if (hypre) {
6608651b1cf9SStefano Zampini     PetscInt *minj;
6609651b1cf9SStefano Zampini     PetscBT   hasdiag;
6610651b1cf9SStefano Zampini 
6611651b1cf9SStefano Zampini     PetscCall(PetscBTCreate(m, &hasdiag));
6612651b1cf9SStefano Zampini     PetscCall(PetscMalloc1(m, &minj));
66131690c2aeSBarry Smith     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6614651b1cf9SStefano Zampini     for (k = i1start; k < rem; k++) {
6615651b1cf9SStefano Zampini       if (j1[k] < cstart || j1[k] >= cend) continue;
6616651b1cf9SStefano Zampini       const PetscInt rindex = i1[k] - rstart;
6617651b1cf9SStefano Zampini       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6618651b1cf9SStefano Zampini       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6619651b1cf9SStefano Zampini     }
6620651b1cf9SStefano Zampini     for (k = 0; k < n2; k++) {
6621651b1cf9SStefano Zampini       if (j2[k] < cstart || j2[k] >= cend) continue;
6622651b1cf9SStefano Zampini       const PetscInt rindex = i2[k] - rstart;
6623651b1cf9SStefano Zampini       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6624651b1cf9SStefano Zampini       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6625651b1cf9SStefano Zampini     }
6626651b1cf9SStefano Zampini     for (k = i1start; k < rem; k++) {
6627651b1cf9SStefano Zampini       const PetscInt rindex = i1[k] - rstart;
6628651b1cf9SStefano Zampini       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6629651b1cf9SStefano Zampini       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6630651b1cf9SStefano Zampini       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6631651b1cf9SStefano Zampini     }
6632651b1cf9SStefano Zampini     for (k = 0; k < n2; k++) {
6633651b1cf9SStefano Zampini       const PetscInt rindex = i2[k] - rstart;
6634651b1cf9SStefano Zampini       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6635651b1cf9SStefano Zampini       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6636651b1cf9SStefano Zampini       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6637651b1cf9SStefano Zampini     }
6638651b1cf9SStefano Zampini     PetscCall(PetscBTDestroy(&hasdiag));
6639651b1cf9SStefano Zampini     PetscCall(PetscFree(minj));
6640651b1cf9SStefano Zampini   }
6641651b1cf9SStefano Zampini 
6642651b1cf9SStefano Zampini   /* Split local COOs and received COOs into diag/offdiag portions */
6643651b1cf9SStefano Zampini   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6644651b1cf9SStefano Zampini   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6645651b1cf9SStefano Zampini   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6646394ed5ebSJunchao Zhang   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6647394ed5ebSJunchao Zhang   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6648394ed5ebSJunchao Zhang   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6649394ed5ebSJunchao Zhang 
6650651b1cf9SStefano Zampini   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
66519566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6652651b1cf9SStefano Zampini   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
66539566063dSJacob Faibussowitsch   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6654394ed5ebSJunchao Zhang 
6655394ed5ebSJunchao Zhang   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6656394ed5ebSJunchao Zhang   PetscInt *Ai, *Bi;
6657394ed5ebSJunchao Zhang   PetscInt *Aj, *Bj;
6658394ed5ebSJunchao Zhang 
66599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &Ai));
66609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &Bi));
66619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
66629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6663394ed5ebSJunchao Zhang 
6664394ed5ebSJunchao Zhang   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6665158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6666158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6667158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6668158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6669394ed5ebSJunchao Zhang 
66709566063dSJacob Faibussowitsch   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
66719566063dSJacob Faibussowitsch   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6672158ec288SJunchao Zhang 
6673158ec288SJunchao Zhang   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6674158ec288SJunchao Zhang   /* expect nonzeros in A/B most likely have local contributing entries        */
6675158ec288SJunchao Zhang   PetscInt    Annz = Ai[m];
6676158ec288SJunchao Zhang   PetscInt    Bnnz = Bi[m];
6677158ec288SJunchao Zhang   PetscCount *Ajmap1_new, *Bjmap1_new;
6678158ec288SJunchao Zhang 
6679158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6680158ec288SJunchao Zhang   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6681158ec288SJunchao Zhang 
6682158ec288SJunchao Zhang   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6683158ec288SJunchao Zhang   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6684158ec288SJunchao Zhang 
6685158ec288SJunchao Zhang   PetscCall(PetscFree(Aimap1));
6686158ec288SJunchao Zhang   PetscCall(PetscFree(Ajmap1));
6687158ec288SJunchao Zhang   PetscCall(PetscFree(Bimap1));
6688158ec288SJunchao Zhang   PetscCall(PetscFree(Bjmap1));
66899566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
66909566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6691e8729f6fSJunchao Zhang   PetscCall(PetscFree(perm1));
66929566063dSJacob Faibussowitsch   PetscCall(PetscFree3(i2, j2, perm2));
6693394ed5ebSJunchao Zhang 
6694158ec288SJunchao Zhang   Ajmap1 = Ajmap1_new;
6695158ec288SJunchao Zhang   Bjmap1 = Bjmap1_new;
6696158ec288SJunchao Zhang 
6697394ed5ebSJunchao Zhang   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6698394ed5ebSJunchao Zhang   if (Annz < Annz1 + Annz2) {
6699394ed5ebSJunchao Zhang     PetscInt *Aj_new;
67009566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Annz, &Aj_new));
67019566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
67029566063dSJacob Faibussowitsch     PetscCall(PetscFree(Aj));
6703394ed5ebSJunchao Zhang     Aj = Aj_new;
6704394ed5ebSJunchao Zhang   }
6705394ed5ebSJunchao Zhang 
6706394ed5ebSJunchao Zhang   if (Bnnz < Bnnz1 + Bnnz2) {
6707394ed5ebSJunchao Zhang     PetscInt *Bj_new;
67089566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
67099566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
67109566063dSJacob Faibussowitsch     PetscCall(PetscFree(Bj));
6711394ed5ebSJunchao Zhang     Bj = Bj_new;
6712394ed5ebSJunchao Zhang   }
6713394ed5ebSJunchao Zhang 
6714cbc6b225SStefano Zampini   /* Create new submatrices for on-process and off-process coupling                  */
6715394ed5ebSJunchao Zhang   PetscScalar     *Aa, *Ba;
6716cbc6b225SStefano Zampini   MatType          rtype;
6717394ed5ebSJunchao Zhang   Mat_SeqAIJ      *a, *b;
6718cf8ba265SJunchao Zhang   PetscObjectState state;
67199566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
67209566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Bnnz, &Ba));
6721394ed5ebSJunchao Zhang   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
67229371c9d4SSatish Balay   if (cstart) {
67239371c9d4SSatish Balay     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
67249371c9d4SSatish Balay   }
6725c508b908SBarry Smith 
67269566063dSJacob Faibussowitsch   PetscCall(MatGetRootType_Private(mat, &rtype));
6727c508b908SBarry Smith 
6728c508b908SBarry Smith   MatSeqXAIJGetOptions_Private(mpiaij->A);
6729c508b908SBarry Smith   PetscCall(MatDestroy(&mpiaij->A));
67309566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
67310da474c6SJeremy L Thompson   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6732c508b908SBarry Smith   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6733c508b908SBarry Smith 
6734c508b908SBarry Smith   MatSeqXAIJGetOptions_Private(mpiaij->B);
6735c508b908SBarry Smith   PetscCall(MatDestroy(&mpiaij->B));
67369566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
67370da474c6SJeremy L Thompson   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6738c508b908SBarry Smith   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6739c508b908SBarry Smith 
67409566063dSJacob Faibussowitsch   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
67415bb69915SJunchao Zhang   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6742cf8ba265SJunchao Zhang   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6743462c564dSBarry Smith   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6744cbc6b225SStefano Zampini 
6745394ed5ebSJunchao Zhang   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6746394ed5ebSJunchao Zhang   b          = (Mat_SeqAIJ *)mpiaij->B->data;
67479f0612e4SBarry Smith   a->free_a  = PETSC_TRUE;
67489f0612e4SBarry Smith   a->free_ij = PETSC_TRUE;
67499f0612e4SBarry Smith   b->free_a  = PETSC_TRUE;
67509f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
675117b874c6SStefano Zampini   a->maxnz   = a->nz;
675217b874c6SStefano Zampini   b->maxnz   = b->nz;
6753394ed5ebSJunchao Zhang 
6754cbc6b225SStefano Zampini   /* conversion must happen AFTER multiply setup */
67559566063dSJacob Faibussowitsch   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
67569566063dSJacob Faibussowitsch   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
67579566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mpiaij->lvec));
67589566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6759cbc6b225SStefano Zampini 
67602c4ab24aSJunchao Zhang   // Put the COO struct in a container and then attach that to the matrix
67612c4ab24aSJunchao Zhang   PetscCall(PetscMalloc1(1, &coo));
67622c4ab24aSJunchao Zhang   coo->n       = coo_n;
67632c4ab24aSJunchao Zhang   coo->sf      = sf2;
67642c4ab24aSJunchao Zhang   coo->sendlen = nleaves;
67652c4ab24aSJunchao Zhang   coo->recvlen = nroots;
67662c4ab24aSJunchao Zhang   coo->Annz    = Annz;
67672c4ab24aSJunchao Zhang   coo->Bnnz    = Bnnz;
67682c4ab24aSJunchao Zhang   coo->Annz2   = Annz2;
67692c4ab24aSJunchao Zhang   coo->Bnnz2   = Bnnz2;
67702c4ab24aSJunchao Zhang   coo->Atot1   = Atot1;
67712c4ab24aSJunchao Zhang   coo->Atot2   = Atot2;
67722c4ab24aSJunchao Zhang   coo->Btot1   = Btot1;
67732c4ab24aSJunchao Zhang   coo->Btot2   = Btot2;
67742c4ab24aSJunchao Zhang   coo->Ajmap1  = Ajmap1;
67752c4ab24aSJunchao Zhang   coo->Aperm1  = Aperm1;
67762c4ab24aSJunchao Zhang   coo->Bjmap1  = Bjmap1;
67772c4ab24aSJunchao Zhang   coo->Bperm1  = Bperm1;
67782c4ab24aSJunchao Zhang   coo->Aimap2  = Aimap2;
67792c4ab24aSJunchao Zhang   coo->Ajmap2  = Ajmap2;
67802c4ab24aSJunchao Zhang   coo->Aperm2  = Aperm2;
67812c4ab24aSJunchao Zhang   coo->Bimap2  = Bimap2;
67822c4ab24aSJunchao Zhang   coo->Bjmap2  = Bjmap2;
67832c4ab24aSJunchao Zhang   coo->Bperm2  = Bperm2;
67842c4ab24aSJunchao Zhang   coo->Cperm1  = Cperm1;
67852c4ab24aSJunchao Zhang   // Allocate in preallocation. If not used, it has zero cost on host
67862c4ab24aSJunchao Zhang   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
67872c4ab24aSJunchao Zhang   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
67882c4ab24aSJunchao Zhang   PetscCall(PetscContainerSetPointer(container, coo));
678949abdd8aSBarry Smith   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
67902c4ab24aSJunchao Zhang   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
67912c4ab24aSJunchao Zhang   PetscCall(PetscContainerDestroy(&container));
67923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6793394ed5ebSJunchao Zhang }
6794394ed5ebSJunchao Zhang 
6795d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6796d71ae5a4SJacob Faibussowitsch {
6797394ed5ebSJunchao Zhang   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6798394ed5ebSJunchao Zhang   Mat                  A = mpiaij->A, B = mpiaij->B;
6799394ed5ebSJunchao Zhang   PetscScalar         *Aa, *Ba;
68002c4ab24aSJunchao Zhang   PetscScalar         *sendbuf, *recvbuf;
68012c4ab24aSJunchao Zhang   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
68022c4ab24aSJunchao Zhang   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
68032c4ab24aSJunchao Zhang   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
68042c4ab24aSJunchao Zhang   const PetscCount    *Cperm1;
68052c4ab24aSJunchao Zhang   PetscContainer       container;
68062c4ab24aSJunchao Zhang   MatCOOStruct_MPIAIJ *coo;
6807394ed5ebSJunchao Zhang 
6808394ed5ebSJunchao Zhang   PetscFunctionBegin;
68092c4ab24aSJunchao Zhang   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
68102c4ab24aSJunchao Zhang   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
68112c4ab24aSJunchao Zhang   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
68122c4ab24aSJunchao Zhang   sendbuf = coo->sendbuf;
68132c4ab24aSJunchao Zhang   recvbuf = coo->recvbuf;
68142c4ab24aSJunchao Zhang   Ajmap1  = coo->Ajmap1;
68152c4ab24aSJunchao Zhang   Ajmap2  = coo->Ajmap2;
68162c4ab24aSJunchao Zhang   Aimap2  = coo->Aimap2;
68172c4ab24aSJunchao Zhang   Bjmap1  = coo->Bjmap1;
68182c4ab24aSJunchao Zhang   Bjmap2  = coo->Bjmap2;
68192c4ab24aSJunchao Zhang   Bimap2  = coo->Bimap2;
68202c4ab24aSJunchao Zhang   Aperm1  = coo->Aperm1;
68212c4ab24aSJunchao Zhang   Aperm2  = coo->Aperm2;
68222c4ab24aSJunchao Zhang   Bperm1  = coo->Bperm1;
68232c4ab24aSJunchao Zhang   Bperm2  = coo->Bperm2;
68242c4ab24aSJunchao Zhang   Cperm1  = coo->Cperm1;
68252c4ab24aSJunchao Zhang 
68269566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
68279566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArray(B, &Ba));
6828394ed5ebSJunchao Zhang 
6829394ed5ebSJunchao Zhang   /* Pack entries to be sent to remote */
68302c4ab24aSJunchao Zhang   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6831394ed5ebSJunchao Zhang 
6832394ed5ebSJunchao Zhang   /* Send remote entries to their owner and overlap the communication with local computation */
68332c4ab24aSJunchao Zhang   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6834394ed5ebSJunchao Zhang   /* Add local entries to A and B */
68352c4ab24aSJunchao Zhang   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6836da81f932SPierre Jolivet     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6837158ec288SJunchao Zhang     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6838158ec288SJunchao Zhang     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6839394ed5ebSJunchao Zhang   }
68402c4ab24aSJunchao Zhang   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6841158ec288SJunchao Zhang     PetscScalar sum = 0.0;
6842158ec288SJunchao Zhang     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6843158ec288SJunchao Zhang     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6844394ed5ebSJunchao Zhang   }
68452c4ab24aSJunchao Zhang   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6846394ed5ebSJunchao Zhang 
6847394ed5ebSJunchao Zhang   /* Add received remote entries to A and B */
68482c4ab24aSJunchao Zhang   for (PetscCount i = 0; i < coo->Annz2; i++) {
6849394ed5ebSJunchao Zhang     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6850394ed5ebSJunchao Zhang   }
68512c4ab24aSJunchao Zhang   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6852394ed5ebSJunchao Zhang     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6853394ed5ebSJunchao Zhang   }
68549566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
68559566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
68563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6857394ed5ebSJunchao Zhang }
6858394ed5ebSJunchao Zhang 
6859ccd8e176SBarry Smith /*MC
6860ccd8e176SBarry Smith    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6861ccd8e176SBarry Smith 
6862ccd8e176SBarry Smith    Options Database Keys:
686311a5261eSBarry Smith . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6864ccd8e176SBarry Smith 
6865ccd8e176SBarry Smith    Level: beginner
68660cd7f59aSBarry Smith 
68670cd7f59aSBarry Smith    Notes:
68682ef1f0ffSBarry Smith    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
68690cd7f59aSBarry Smith     in this case the values associated with the rows and columns one passes in are set to zero
68700cd7f59aSBarry Smith     in the matrix
68710cd7f59aSBarry Smith 
687211a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
687311a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6874ccd8e176SBarry Smith 
68751cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6876ccd8e176SBarry Smith M*/
6877d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6878d71ae5a4SJacob Faibussowitsch {
6879ccd8e176SBarry Smith   Mat_MPIAIJ *b;
6880ccd8e176SBarry Smith   PetscMPIInt size;
6881ccd8e176SBarry Smith 
6882ccd8e176SBarry Smith   PetscFunctionBegin;
68839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
68842205254eSKarl Rupp 
68854dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
6886ccd8e176SBarry Smith   B->data       = (void *)b;
6887aea10558SJacob Faibussowitsch   B->ops[0]     = MatOps_Values;
6888ccd8e176SBarry Smith   B->assembled  = PETSC_FALSE;
6889ccd8e176SBarry Smith   B->insertmode = NOT_SET_VALUES;
6890ccd8e176SBarry Smith   b->size       = size;
68912205254eSKarl Rupp 
68929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6893ccd8e176SBarry Smith 
6894ccd8e176SBarry Smith   /* build cache for off array entries formed */
68959566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
68962205254eSKarl Rupp 
6897ccd8e176SBarry Smith   b->donotstash  = PETSC_FALSE;
6898f4259b30SLisandro Dalcin   b->colmap      = NULL;
6899f4259b30SLisandro Dalcin   b->garray      = NULL;
6900ccd8e176SBarry Smith   b->roworiented = PETSC_TRUE;
6901ccd8e176SBarry Smith 
6902ccd8e176SBarry Smith   /* stuff used for matrix vector multiply */
69030298fd71SBarry Smith   b->lvec  = NULL;
69040298fd71SBarry Smith   b->Mvctx = NULL;
6905ccd8e176SBarry Smith 
6906ccd8e176SBarry Smith   /* stuff for MatGetRow() */
6907f4259b30SLisandro Dalcin   b->rowindices   = NULL;
6908f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
6909ccd8e176SBarry Smith   b->getrowactive = PETSC_FALSE;
6910ccd8e176SBarry Smith 
6911f719121fSJed Brown   /* flexible pointer used in CUSPARSE classes */
69120298fd71SBarry Smith   b->spptr = NULL;
6913f60c3dc2SHong Zhang 
69149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
69159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
69169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
69179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
69189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
69199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6920*674b392bSAlexander   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
69219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
69229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
69239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
69249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
69253d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA)
69269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
69273d0639e7SStefano Zampini #endif
6928d5e393b6SSuyash Tandon #if defined(PETSC_HAVE_HIP)
6929d5e393b6SSuyash Tandon   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6930d5e393b6SSuyash Tandon #endif
69313d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS)
69329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
69333d0639e7SStefano Zampini #endif
69349779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE)
69359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6936191b95cbSRichard Tran Mills #endif
69379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
69389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
69399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
69409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
69415d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
69429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
69435d7652ecSHong Zhang #endif
6944d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
69459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6946d24d4204SJose E. Roman #endif
69479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
69489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
69493dad0653Sstefano_zampini #if defined(PETSC_HAVE_HYPRE)
69509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
69519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
69523dad0653Sstefano_zampini #endif
69539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
69549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
69559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
69569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
69579566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
69583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6959ccd8e176SBarry Smith }
696081824310SBarry Smith 
69615d83a8b1SBarry Smith /*@
696211a5261eSBarry Smith   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
696303bfb495SBarry Smith   and "off-diagonal" part of the matrix in CSR format.
696403bfb495SBarry Smith 
6965d083f849SBarry Smith   Collective
696603bfb495SBarry Smith 
696703bfb495SBarry Smith   Input Parameters:
696803bfb495SBarry Smith + comm - MPI communicator
696911a5261eSBarry Smith . m    - number of local rows (Cannot be `PETSC_DECIDE`)
697003bfb495SBarry Smith . n    - This value should be the same as the local size used in creating the
6971d8a51d2aSBarry Smith          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
69722ef1f0ffSBarry Smith          calculated if `N` is given) For square matrices `n` is almost always `m`.
69732ef1f0ffSBarry Smith . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
69742ef1f0ffSBarry Smith . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6975483a2f95SBarry Smith . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
697604ccdda3SJunchao Zhang . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
697703bfb495SBarry Smith . a    - matrix values
6978483a2f95SBarry Smith . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
69792ef1f0ffSBarry Smith . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
698003bfb495SBarry Smith - oa   - matrix values
698103bfb495SBarry Smith 
698203bfb495SBarry Smith   Output Parameter:
698303bfb495SBarry Smith . mat - the matrix
698403bfb495SBarry Smith 
698503bfb495SBarry Smith   Level: advanced
698603bfb495SBarry Smith 
698703bfb495SBarry Smith   Notes:
6988f13dfd9eSBarry Smith   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6989292fb18eSBarry Smith   must free the arrays once the matrix has been destroyed and not before.
699003bfb495SBarry Smith 
69912ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based
699203bfb495SBarry Smith 
69932ef1f0ffSBarry Smith   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
699403bfb495SBarry Smith 
69957b55108eSBarry Smith   This sets local rows and cannot be used to set off-processor values.
69967b55108eSBarry Smith 
6997dca341c0SJed Brown   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6998dca341c0SJed Brown   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6999dca341c0SJed Brown   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
7000dca341c0SJed Brown   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
700111a5261eSBarry Smith   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
7002dca341c0SJed Brown   communication if it is known that only local entries will be set.
700303bfb495SBarry Smith 
70041cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
7005db781477SPatrick Sanan           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
70062b26979fSBarry Smith @*/
7007d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
7008d71ae5a4SJacob Faibussowitsch {
700903bfb495SBarry Smith   Mat_MPIAIJ *maij;
701003bfb495SBarry Smith 
701103bfb495SBarry Smith   PetscFunctionBegin;
701208401ef6SPierre Jolivet   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7013aed4548fSBarry Smith   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7014aed4548fSBarry Smith   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
70159566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
70169566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
70179566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPIAIJ));
701803bfb495SBarry Smith   maij = (Mat_MPIAIJ *)(*mat)->data;
70192205254eSKarl Rupp 
70208d7a6e47SBarry Smith   (*mat)->preallocated = PETSC_TRUE;
702103bfb495SBarry Smith 
70229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->rmap));
70239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp((*mat)->cmap));
702403bfb495SBarry Smith 
70259566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
70269566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
702703bfb495SBarry Smith 
70289566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
70299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
70309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
70319566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
70329566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
70333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
703403bfb495SBarry Smith }
703503bfb495SBarry Smith 
70364e84afc0SStefano Zampini typedef struct {
70374e84afc0SStefano Zampini   Mat       *mp;    /* intermediate products */
70384e84afc0SStefano Zampini   PetscBool *mptmp; /* is the intermediate product temporary ? */
70394e84afc0SStefano Zampini   PetscInt   cp;    /* number of intermediate products */
70404e84afc0SStefano Zampini 
70414e84afc0SStefano Zampini   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
70424e84afc0SStefano Zampini   PetscInt    *startsj_s, *startsj_r;
70434e84afc0SStefano Zampini   PetscScalar *bufa;
70444e84afc0SStefano Zampini   Mat          P_oth;
70454e84afc0SStefano Zampini 
70464e84afc0SStefano Zampini   /* may take advantage of merging product->B */
7047ddea5d60SJunchao Zhang   Mat Bloc; /* B-local by merging diag and off-diag */
70484e84afc0SStefano Zampini 
7049ddea5d60SJunchao Zhang   /* cusparse does not have support to split between symbolic and numeric phases.
70504e84afc0SStefano Zampini      When api_user is true, we don't need to update the numerical values
70514e84afc0SStefano Zampini      of the temporary storage */
70524e84afc0SStefano Zampini   PetscBool reusesym;
70534e84afc0SStefano Zampini 
70544e84afc0SStefano Zampini   /* support for COO values insertion */
7055ddea5d60SJunchao Zhang   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7056ddea5d60SJunchao Zhang   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7057ddea5d60SJunchao Zhang   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7058ddea5d60SJunchao Zhang   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7059c215019aSStefano Zampini   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7060c215019aSStefano Zampini   PetscMemType mtype;
70614e84afc0SStefano Zampini 
70624e84afc0SStefano Zampini   /* customization */
70634e84afc0SStefano Zampini   PetscBool abmerge;
7064abb89eb1SStefano Zampini   PetscBool P_oth_bind;
70654e84afc0SStefano Zampini } MatMatMPIAIJBACKEND;
70664e84afc0SStefano Zampini 
7067ba38deedSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7068d71ae5a4SJacob Faibussowitsch {
70694e84afc0SStefano Zampini   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
70704e84afc0SStefano Zampini   PetscInt             i;
70714e84afc0SStefano Zampini 
70724e84afc0SStefano Zampini   PetscFunctionBegin;
70739566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
70749566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->bufa));
70759566063dSJacob Faibussowitsch   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
70769566063dSJacob Faibussowitsch   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
70779566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mmdata->P_oth));
70789566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mmdata->Bloc));
70799566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mmdata->sf));
708048a46eb9SPierre Jolivet   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
70819566063dSJacob Faibussowitsch   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
70829566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->own[0]));
70839566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->own));
70849566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->off[0]));
70859566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata->off));
70869566063dSJacob Faibussowitsch   PetscCall(PetscFree(mmdata));
70873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
70884e84afc0SStefano Zampini }
70894e84afc0SStefano Zampini 
7090fff043a9SJunchao Zhang /* Copy selected n entries with indices in idx[] of A to v[].
7091fff043a9SJunchao Zhang    If idx is NULL, copy the whole data array of A to v[]
7092fff043a9SJunchao Zhang  */
7093d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7094d71ae5a4SJacob Faibussowitsch {
7095c215019aSStefano Zampini   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7096c215019aSStefano Zampini 
7097c215019aSStefano Zampini   PetscFunctionBegin;
70989566063dSJacob Faibussowitsch   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7099c215019aSStefano Zampini   if (f) {
71009566063dSJacob Faibussowitsch     PetscCall((*f)(A, n, idx, v));
7101c215019aSStefano Zampini   } else {
7102c215019aSStefano Zampini     const PetscScalar *vv;
7103c215019aSStefano Zampini 
71049566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7105c215019aSStefano Zampini     if (n && idx) {
7106c215019aSStefano Zampini       PetscScalar    *w  = v;
7107c215019aSStefano Zampini       const PetscInt *oi = idx;
7108c215019aSStefano Zampini       PetscInt        j;
7109c215019aSStefano Zampini 
7110c215019aSStefano Zampini       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7111c215019aSStefano Zampini     } else {
71129566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(v, vv, n));
7113c215019aSStefano Zampini     }
71149566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7115c215019aSStefano Zampini   }
71163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7117c215019aSStefano Zampini }
7118c215019aSStefano Zampini 
7119d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7120d71ae5a4SJacob Faibussowitsch {
71214e84afc0SStefano Zampini   MatMatMPIAIJBACKEND *mmdata;
71224e84afc0SStefano Zampini   PetscInt             i, n_d, n_o;
71234e84afc0SStefano Zampini 
71244e84afc0SStefano Zampini   PetscFunctionBegin;
71254e84afc0SStefano Zampini   MatCheckProduct(C, 1);
712628b400f6SJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
71274e84afc0SStefano Zampini   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
71284e84afc0SStefano Zampini   if (!mmdata->reusesym) { /* update temporary matrices */
712948a46eb9SPierre Jolivet     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
713048a46eb9SPierre Jolivet     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
71314e84afc0SStefano Zampini   }
71324e84afc0SStefano Zampini   mmdata->reusesym = PETSC_FALSE;
7133abb89eb1SStefano Zampini 
7134abb89eb1SStefano Zampini   for (i = 0; i < mmdata->cp; i++) {
713508401ef6SPierre Jolivet     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
71369566063dSJacob Faibussowitsch     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7137abb89eb1SStefano Zampini   }
71384e84afc0SStefano Zampini   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
71396497c311SBarry Smith     PetscInt noff;
71404e84afc0SStefano Zampini 
71416497c311SBarry Smith     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
71424e84afc0SStefano Zampini     if (mmdata->mptmp[i]) continue;
71434e84afc0SStefano Zampini     if (noff) {
71446497c311SBarry Smith       PetscInt nown;
7145c215019aSStefano Zampini 
71466497c311SBarry Smith       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
71479566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
71489566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
71494e84afc0SStefano Zampini       n_o += noff;
71504e84afc0SStefano Zampini       n_d += nown;
71514e84afc0SStefano Zampini     } else {
7152c215019aSStefano Zampini       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7153c215019aSStefano Zampini 
71549566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
71554e84afc0SStefano Zampini       n_d += mm->nz;
71564e84afc0SStefano Zampini     }
71574e84afc0SStefano Zampini   }
7158c215019aSStefano Zampini   if (mmdata->hasoffproc) { /* offprocess insertion */
71599566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
71609566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
71614e84afc0SStefano Zampini   }
71629566063dSJacob Faibussowitsch   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
71633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
71644e84afc0SStefano Zampini }
71654e84afc0SStefano Zampini 
71664e84afc0SStefano Zampini /* Support for Pt * A, A * P, or Pt * A * P */
71674e84afc0SStefano Zampini #define MAX_NUMBER_INTERMEDIATE 4
7168d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7169d71ae5a4SJacob Faibussowitsch {
71704e84afc0SStefano Zampini   Mat_Product           *product = C->product;
7171ddea5d60SJunchao Zhang   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
71724e84afc0SStefano Zampini   Mat_MPIAIJ            *a, *p;
71734e84afc0SStefano Zampini   MatMatMPIAIJBACKEND   *mmdata;
71744e84afc0SStefano Zampini   ISLocalToGlobalMapping P_oth_l2g = NULL;
71754e84afc0SStefano Zampini   IS                     glob      = NULL;
71764e84afc0SStefano Zampini   const char            *prefix;
71774e84afc0SStefano Zampini   char                   pprefix[256];
71784e84afc0SStefano Zampini   const PetscInt        *globidx, *P_oth_idx;
717982a78a4eSJed Brown   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
718082a78a4eSJed Brown   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7181ddea5d60SJunchao Zhang   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7182ddea5d60SJunchao Zhang                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7183ddea5d60SJunchao Zhang                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7184ddea5d60SJunchao Zhang   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7185ddea5d60SJunchao Zhang 
71864e84afc0SStefano Zampini   MatProductType ptype;
7187d5e393b6SSuyash Tandon   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
71884e84afc0SStefano Zampini   PetscMPIInt    size;
71894e84afc0SStefano Zampini 
71904e84afc0SStefano Zampini   PetscFunctionBegin;
71914e84afc0SStefano Zampini   MatCheckProduct(C, 1);
719228b400f6SJacob Faibussowitsch   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
71934e84afc0SStefano Zampini   ptype = product->type;
7194b94d7dedSBarry Smith   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7195fa046f9fSJunchao Zhang     ptype                                          = MATPRODUCT_AB;
7196fa046f9fSJunchao Zhang     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7197fa046f9fSJunchao Zhang   }
71984e84afc0SStefano Zampini   switch (ptype) {
71994e84afc0SStefano Zampini   case MATPRODUCT_AB:
72004e84afc0SStefano Zampini     A          = product->A;
72014e84afc0SStefano Zampini     P          = product->B;
72024e84afc0SStefano Zampini     m          = A->rmap->n;
72034e84afc0SStefano Zampini     n          = P->cmap->n;
72044e84afc0SStefano Zampini     M          = A->rmap->N;
72054e84afc0SStefano Zampini     N          = P->cmap->N;
7206ddea5d60SJunchao Zhang     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
72074e84afc0SStefano Zampini     break;
72084e84afc0SStefano Zampini   case MATPRODUCT_AtB:
72094e84afc0SStefano Zampini     P          = product->A;
72104e84afc0SStefano Zampini     A          = product->B;
72114e84afc0SStefano Zampini     m          = P->cmap->n;
72124e84afc0SStefano Zampini     n          = A->cmap->n;
72134e84afc0SStefano Zampini     M          = P->cmap->N;
72144e84afc0SStefano Zampini     N          = A->cmap->N;
72154e84afc0SStefano Zampini     hasoffproc = PETSC_TRUE;
72164e84afc0SStefano Zampini     break;
72174e84afc0SStefano Zampini   case MATPRODUCT_PtAP:
72184e84afc0SStefano Zampini     A          = product->A;
72194e84afc0SStefano Zampini     P          = product->B;
72204e84afc0SStefano Zampini     m          = P->cmap->n;
72214e84afc0SStefano Zampini     n          = P->cmap->n;
72224e84afc0SStefano Zampini     M          = P->cmap->N;
72234e84afc0SStefano Zampini     N          = P->cmap->N;
72244e84afc0SStefano Zampini     hasoffproc = PETSC_TRUE;
72254e84afc0SStefano Zampini     break;
7226d71ae5a4SJacob Faibussowitsch   default:
7227d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
72284e84afc0SStefano Zampini   }
72299566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
72304e84afc0SStefano Zampini   if (size == 1) hasoffproc = PETSC_FALSE;
72314e84afc0SStefano Zampini 
72324e84afc0SStefano Zampini   /* defaults */
72334e84afc0SStefano Zampini   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
72344e84afc0SStefano Zampini     mp[i]    = NULL;
72354e84afc0SStefano Zampini     mptmp[i] = PETSC_FALSE;
72364e84afc0SStefano Zampini     rmapt[i] = -1;
72374e84afc0SStefano Zampini     cmapt[i] = -1;
72384e84afc0SStefano Zampini     rmapa[i] = NULL;
72394e84afc0SStefano Zampini     cmapa[i] = NULL;
72404e84afc0SStefano Zampini   }
72414e84afc0SStefano Zampini 
72424e84afc0SStefano Zampini   /* customization */
72439566063dSJacob Faibussowitsch   PetscCall(PetscNew(&mmdata));
72444e84afc0SStefano Zampini   mmdata->reusesym = product->api_user;
72454e84afc0SStefano Zampini   if (ptype == MATPRODUCT_AB) {
72464e84afc0SStefano Zampini     if (product->api_user) {
7247d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
72489566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
72499566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7250d0609cedSBarry Smith       PetscOptionsEnd();
72514e84afc0SStefano Zampini     } else {
7252d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
72539566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
72549566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7255d0609cedSBarry Smith       PetscOptionsEnd();
7256abb89eb1SStefano Zampini     }
7257abb89eb1SStefano Zampini   } else if (ptype == MATPRODUCT_PtAP) {
7258abb89eb1SStefano Zampini     if (product->api_user) {
7259d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
72609566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7261d0609cedSBarry Smith       PetscOptionsEnd();
7262abb89eb1SStefano Zampini     } else {
7263d0609cedSBarry Smith       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
72649566063dSJacob Faibussowitsch       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7265d0609cedSBarry Smith       PetscOptionsEnd();
72664e84afc0SStefano Zampini     }
72674e84afc0SStefano Zampini   }
72684e84afc0SStefano Zampini   a = (Mat_MPIAIJ *)A->data;
72694e84afc0SStefano Zampini   p = (Mat_MPIAIJ *)P->data;
72709566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, m, n, M, N));
72719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(C->rmap));
72729566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(C->cmap));
72739566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
72749566063dSJacob Faibussowitsch   PetscCall(MatGetOptionsPrefix(C, &prefix));
7275ddea5d60SJunchao Zhang 
7276ddea5d60SJunchao Zhang   cp = 0;
72774e84afc0SStefano Zampini   switch (ptype) {
72784e84afc0SStefano Zampini   case MATPRODUCT_AB: /* A * P */
72799566063dSJacob Faibussowitsch     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
72804e84afc0SStefano Zampini 
7281ddea5d60SJunchao Zhang     /* A_diag * P_local (merged or not) */
7282ddea5d60SJunchao Zhang     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
72834e84afc0SStefano Zampini       /* P is product->B */
72849566063dSJacob Faibussowitsch       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
72859566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
72869566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
72879566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
72889566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
72899566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
72909566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
72914e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
72929566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
72939566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
72949566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob, &globidx));
72954e84afc0SStefano Zampini       rmapt[cp] = 1;
72964e84afc0SStefano Zampini       cmapt[cp] = 2;
72974e84afc0SStefano Zampini       cmapa[cp] = globidx;
72984e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
72994e84afc0SStefano Zampini       cp++;
7300ddea5d60SJunchao Zhang     } else { /* A_diag * P_diag and A_diag * P_off */
73019566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
73029566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
73039566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
73049566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
73059566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
73069566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
73074e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
73089566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
73099566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
73104e84afc0SStefano Zampini       rmapt[cp] = 1;
73114e84afc0SStefano Zampini       cmapt[cp] = 1;
73124e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
73134e84afc0SStefano Zampini       cp++;
73149566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
73159566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
73169566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
73179566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
73189566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
73199566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
73204e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
73219566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
73229566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
73234e84afc0SStefano Zampini       rmapt[cp] = 1;
73244e84afc0SStefano Zampini       cmapt[cp] = 2;
73254e84afc0SStefano Zampini       cmapa[cp] = p->garray;
73264e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
73274e84afc0SStefano Zampini       cp++;
73284e84afc0SStefano Zampini     }
7329ddea5d60SJunchao Zhang 
7330ddea5d60SJunchao Zhang     /* A_off * P_other */
73314e84afc0SStefano Zampini     if (mmdata->P_oth) {
73329566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
73339566063dSJacob Faibussowitsch       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7334f4f49eeaSPierre Jolivet       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
73359566063dSJacob Faibussowitsch       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
73369566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
73379566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
73389566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
73399566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
73409566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
73419566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
73424e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
73439566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
73449566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
73454e84afc0SStefano Zampini       rmapt[cp] = 1;
73464e84afc0SStefano Zampini       cmapt[cp] = 2;
73474e84afc0SStefano Zampini       cmapa[cp] = P_oth_idx;
73484e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
73494e84afc0SStefano Zampini       cp++;
73504e84afc0SStefano Zampini     }
73514e84afc0SStefano Zampini     break;
7352ddea5d60SJunchao Zhang 
73534e84afc0SStefano Zampini   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
73544e84afc0SStefano Zampini     /* A is product->B */
73559566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7356ddea5d60SJunchao Zhang     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
73579566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
73589566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
73599566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
73609566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
73619566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
73629566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
73634e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
73649566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
73659566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
73669566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob, &globidx));
73674e84afc0SStefano Zampini       rmapt[cp] = 2;
73684e84afc0SStefano Zampini       rmapa[cp] = globidx;
73694e84afc0SStefano Zampini       cmapt[cp] = 2;
73704e84afc0SStefano Zampini       cmapa[cp] = globidx;
73714e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
73724e84afc0SStefano Zampini       cp++;
73734e84afc0SStefano Zampini     } else {
73749566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
73759566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
73769566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
73779566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
73789566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
73799566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
73804e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
73819566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
73829566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
73839566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(glob, &globidx));
73844e84afc0SStefano Zampini       rmapt[cp] = 1;
73854e84afc0SStefano Zampini       cmapt[cp] = 2;
73864e84afc0SStefano Zampini       cmapa[cp] = globidx;
73874e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
73884e84afc0SStefano Zampini       cp++;
73899566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
73909566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
73919566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
73929566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
73939566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
73949566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
73954e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
73969566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
73979566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
73984e84afc0SStefano Zampini       rmapt[cp] = 2;
73994e84afc0SStefano Zampini       rmapa[cp] = p->garray;
74004e84afc0SStefano Zampini       cmapt[cp] = 2;
74014e84afc0SStefano Zampini       cmapa[cp] = globidx;
74024e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
74034e84afc0SStefano Zampini       cp++;
74044e84afc0SStefano Zampini     }
74054e84afc0SStefano Zampini     break;
74064e84afc0SStefano Zampini   case MATPRODUCT_PtAP:
74079566063dSJacob Faibussowitsch     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
74084e84afc0SStefano Zampini     /* P is product->B */
74099566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
74109566063dSJacob Faibussowitsch     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
74119566063dSJacob Faibussowitsch     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
74129566063dSJacob Faibussowitsch     PetscCall(MatProductSetFill(mp[cp], product->fill));
74139566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
74149566063dSJacob Faibussowitsch     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
74159566063dSJacob Faibussowitsch     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
74164e84afc0SStefano Zampini     mp[cp]->product->api_user = product->api_user;
74179566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions(mp[cp]));
74189566063dSJacob Faibussowitsch     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
74199566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(glob, &globidx));
74204e84afc0SStefano Zampini     rmapt[cp] = 2;
74214e84afc0SStefano Zampini     rmapa[cp] = globidx;
74224e84afc0SStefano Zampini     cmapt[cp] = 2;
74234e84afc0SStefano Zampini     cmapa[cp] = globidx;
74244e84afc0SStefano Zampini     mptmp[cp] = PETSC_FALSE;
74254e84afc0SStefano Zampini     cp++;
74264e84afc0SStefano Zampini     if (mmdata->P_oth) {
74279566063dSJacob Faibussowitsch       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
74289566063dSJacob Faibussowitsch       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7429f4f49eeaSPierre Jolivet       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
74309566063dSJacob Faibussowitsch       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
74319566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
74329566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
74339566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
74349566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
74359566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
74369566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
74374e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
74389566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
74399566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
74404e84afc0SStefano Zampini       mptmp[cp] = PETSC_TRUE;
74414e84afc0SStefano Zampini       cp++;
74429566063dSJacob Faibussowitsch       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
74439566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
74449566063dSJacob Faibussowitsch       PetscCall(MatProductSetFill(mp[cp], product->fill));
74459566063dSJacob Faibussowitsch       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
74469566063dSJacob Faibussowitsch       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
74479566063dSJacob Faibussowitsch       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
74484e84afc0SStefano Zampini       mp[cp]->product->api_user = product->api_user;
74499566063dSJacob Faibussowitsch       PetscCall(MatProductSetFromOptions(mp[cp]));
74509566063dSJacob Faibussowitsch       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
74514e84afc0SStefano Zampini       rmapt[cp] = 2;
74524e84afc0SStefano Zampini       rmapa[cp] = globidx;
74534e84afc0SStefano Zampini       cmapt[cp] = 2;
74544e84afc0SStefano Zampini       cmapa[cp] = P_oth_idx;
74554e84afc0SStefano Zampini       mptmp[cp] = PETSC_FALSE;
74564e84afc0SStefano Zampini       cp++;
74574e84afc0SStefano Zampini     }
74584e84afc0SStefano Zampini     break;
7459d71ae5a4SJacob Faibussowitsch   default:
7460d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
74614e84afc0SStefano Zampini   }
74624e84afc0SStefano Zampini   /* sanity check */
74639371c9d4SSatish Balay   if (size > 1)
74649371c9d4SSatish Balay     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
74654e84afc0SStefano Zampini 
74669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7467ddea5d60SJunchao Zhang   for (i = 0; i < cp; i++) {
7468ddea5d60SJunchao Zhang     mmdata->mp[i]    = mp[i];
7469ddea5d60SJunchao Zhang     mmdata->mptmp[i] = mptmp[i];
7470ddea5d60SJunchao Zhang   }
74714e84afc0SStefano Zampini   mmdata->cp             = cp;
74724e84afc0SStefano Zampini   C->product->data       = mmdata;
74734e84afc0SStefano Zampini   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
74744e84afc0SStefano Zampini   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
74754e84afc0SStefano Zampini 
7476c215019aSStefano Zampini   /* memory type */
7477c215019aSStefano Zampini   mmdata->mtype = PETSC_MEMTYPE_HOST;
74789566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7479d5e393b6SSuyash Tandon   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
74809566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7481c215019aSStefano Zampini   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7482d5e393b6SSuyash Tandon   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
74833214990dSStefano Zampini   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7484c215019aSStefano Zampini 
74854e84afc0SStefano Zampini   /* prepare coo coordinates for values insertion */
7486ddea5d60SJunchao Zhang 
7487ddea5d60SJunchao Zhang   /* count total nonzeros of those intermediate seqaij Mats
7488ddea5d60SJunchao Zhang     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7489ddea5d60SJunchao Zhang     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7490ddea5d60SJunchao Zhang     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7491ddea5d60SJunchao Zhang   */
74924e84afc0SStefano Zampini   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
74934e84afc0SStefano Zampini     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
74944e84afc0SStefano Zampini     if (mptmp[cp]) continue;
7495ddea5d60SJunchao Zhang     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
74964e84afc0SStefano Zampini       const PetscInt *rmap = rmapa[cp];
74974e84afc0SStefano Zampini       const PetscInt  mr   = mp[cp]->rmap->n;
74984e84afc0SStefano Zampini       const PetscInt  rs   = C->rmap->rstart;
74994e84afc0SStefano Zampini       const PetscInt  re   = C->rmap->rend;
75004e84afc0SStefano Zampini       const PetscInt *ii   = mm->i;
75014e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
75024e84afc0SStefano Zampini         const PetscInt gr = rmap[i];
75034e84afc0SStefano Zampini         const PetscInt nz = ii[i + 1] - ii[i];
7504ddea5d60SJunchao Zhang         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7505ddea5d60SJunchao Zhang         else ncoo_oown += nz;                  /* this row is local */
75064e84afc0SStefano Zampini       }
75074e84afc0SStefano Zampini     } else ncoo_d += mm->nz;
75084e84afc0SStefano Zampini   }
7509ddea5d60SJunchao Zhang 
7510ddea5d60SJunchao Zhang   /*
7511ddea5d60SJunchao Zhang     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7512ddea5d60SJunchao Zhang 
7513ddea5d60SJunchao Zhang     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7514ddea5d60SJunchao Zhang 
7515d5b43468SJose E. Roman     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7516ddea5d60SJunchao Zhang 
7517ddea5d60SJunchao Zhang     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7518ddea5d60SJunchao Zhang     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7519ddea5d60SJunchao Zhang     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7520ddea5d60SJunchao Zhang 
7521ddea5d60SJunchao Zhang     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7522da81f932SPierre Jolivet     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7523ddea5d60SJunchao Zhang   */
75249566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
75259566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7526ddea5d60SJunchao Zhang 
7527ddea5d60SJunchao Zhang   /* gather (i,j) of nonzeros inserted by remote procs */
7528ddea5d60SJunchao Zhang   if (hasoffproc) {
75294e84afc0SStefano Zampini     PetscSF  msf;
75304e84afc0SStefano Zampini     PetscInt ncoo2, *coo_i2, *coo_j2;
75314e84afc0SStefano Zampini 
75329566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
75339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
75349566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7535ddea5d60SJunchao Zhang 
75364e84afc0SStefano Zampini     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
75374e84afc0SStefano Zampini       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
75384e84afc0SStefano Zampini       PetscInt   *idxoff = mmdata->off[cp];
75394e84afc0SStefano Zampini       PetscInt   *idxown = mmdata->own[cp];
7540ddea5d60SJunchao Zhang       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
75414e84afc0SStefano Zampini         const PetscInt *rmap = rmapa[cp];
75424e84afc0SStefano Zampini         const PetscInt *cmap = cmapa[cp];
75434e84afc0SStefano Zampini         const PetscInt *ii   = mm->i;
75444e84afc0SStefano Zampini         PetscInt       *coi  = coo_i + ncoo_o;
75454e84afc0SStefano Zampini         PetscInt       *coj  = coo_j + ncoo_o;
75464e84afc0SStefano Zampini         const PetscInt  mr   = mp[cp]->rmap->n;
75474e84afc0SStefano Zampini         const PetscInt  rs   = C->rmap->rstart;
75484e84afc0SStefano Zampini         const PetscInt  re   = C->rmap->rend;
75494e84afc0SStefano Zampini         const PetscInt  cs   = C->cmap->rstart;
75504e84afc0SStefano Zampini         for (i = 0; i < mr; i++) {
75514e84afc0SStefano Zampini           const PetscInt *jj = mm->j + ii[i];
75524e84afc0SStefano Zampini           const PetscInt  gr = rmap[i];
75534e84afc0SStefano Zampini           const PetscInt  nz = ii[i + 1] - ii[i];
7554ddea5d60SJunchao Zhang           if (gr < rs || gr >= re) { /* this is an offproc row */
75554e84afc0SStefano Zampini             for (j = ii[i]; j < ii[i + 1]; j++) {
75564e84afc0SStefano Zampini               *coi++    = gr;
75574e84afc0SStefano Zampini               *idxoff++ = j;
75584e84afc0SStefano Zampini             }
75594e84afc0SStefano Zampini             if (!cmapt[cp]) { /* already global */
75604e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = jj[j];
75614e84afc0SStefano Zampini             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
75624e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
75634e84afc0SStefano Zampini             } else { /* offdiag */
75644e84afc0SStefano Zampini               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
75654e84afc0SStefano Zampini             }
75664e84afc0SStefano Zampini             ncoo_o += nz;
7567ddea5d60SJunchao Zhang           } else { /* this is a local row */
75684e84afc0SStefano Zampini             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
75694e84afc0SStefano Zampini           }
75704e84afc0SStefano Zampini         }
75714e84afc0SStefano Zampini       }
75724e84afc0SStefano Zampini       mmdata->off[cp + 1] = idxoff;
75734e84afc0SStefano Zampini       mmdata->own[cp + 1] = idxown;
75744e84afc0SStefano Zampini     }
75754e84afc0SStefano Zampini 
75769566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
75776497c311SBarry Smith     PetscInt incoo_o;
75786497c311SBarry Smith     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
75796497c311SBarry Smith     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
75809566063dSJacob Faibussowitsch     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
75819566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
75824e84afc0SStefano Zampini     ncoo = ncoo_d + ncoo_oown + ncoo2;
75839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
75849566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
75859566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
75869566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
75879566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
75889566063dSJacob Faibussowitsch     PetscCall(PetscFree2(coo_i, coo_j));
7589ddea5d60SJunchao Zhang     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
75909566063dSJacob Faibussowitsch     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
75914e84afc0SStefano Zampini     coo_i = coo_i2;
75924e84afc0SStefano Zampini     coo_j = coo_j2;
75934e84afc0SStefano Zampini   } else { /* no offproc values insertion */
75944e84afc0SStefano Zampini     ncoo = ncoo_d;
75959566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7596c215019aSStefano Zampini 
75979566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
75989566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
75999566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(mmdata->sf));
76004e84afc0SStefano Zampini   }
7601c215019aSStefano Zampini   mmdata->hasoffproc = hasoffproc;
76024e84afc0SStefano Zampini 
7603ddea5d60SJunchao Zhang   /* gather (i,j) of nonzeros inserted locally */
76044e84afc0SStefano Zampini   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
76054e84afc0SStefano Zampini     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
76064e84afc0SStefano Zampini     PetscInt       *coi  = coo_i + ncoo_d;
76074e84afc0SStefano Zampini     PetscInt       *coj  = coo_j + ncoo_d;
76084e84afc0SStefano Zampini     const PetscInt *jj   = mm->j;
76094e84afc0SStefano Zampini     const PetscInt *ii   = mm->i;
76104e84afc0SStefano Zampini     const PetscInt *cmap = cmapa[cp];
76114e84afc0SStefano Zampini     const PetscInt *rmap = rmapa[cp];
76124e84afc0SStefano Zampini     const PetscInt  mr   = mp[cp]->rmap->n;
76134e84afc0SStefano Zampini     const PetscInt  rs   = C->rmap->rstart;
76144e84afc0SStefano Zampini     const PetscInt  re   = C->rmap->rend;
76154e84afc0SStefano Zampini     const PetscInt  cs   = C->cmap->rstart;
76164e84afc0SStefano Zampini 
76174e84afc0SStefano Zampini     if (mptmp[cp]) continue;
7618ddea5d60SJunchao Zhang     if (rmapt[cp] == 1) { /* consecutive rows */
7619ddea5d60SJunchao Zhang       /* fill coo_i */
76204e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
76214e84afc0SStefano Zampini         const PetscInt gr = i + rs;
76224e84afc0SStefano Zampini         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
76234e84afc0SStefano Zampini       }
7624ddea5d60SJunchao Zhang       /* fill coo_j */
7625ddea5d60SJunchao Zhang       if (!cmapt[cp]) { /* type-0, already global */
76269566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7627ddea5d60SJunchao Zhang       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7628ddea5d60SJunchao Zhang         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7629ddea5d60SJunchao Zhang       } else {                                            /* type-2, local to global for sparse columns */
76304e84afc0SStefano Zampini         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
76314e84afc0SStefano Zampini       }
76324e84afc0SStefano Zampini       ncoo_d += mm->nz;
7633ddea5d60SJunchao Zhang     } else if (rmapt[cp] == 2) { /* sparse rows */
76344e84afc0SStefano Zampini       for (i = 0; i < mr; i++) {
76354e84afc0SStefano Zampini         const PetscInt *jj = mm->j + ii[i];
76364e84afc0SStefano Zampini         const PetscInt  gr = rmap[i];
76374e84afc0SStefano Zampini         const PetscInt  nz = ii[i + 1] - ii[i];
7638ddea5d60SJunchao Zhang         if (gr >= rs && gr < re) { /* local rows */
76394e84afc0SStefano Zampini           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7640ddea5d60SJunchao Zhang           if (!cmapt[cp]) { /* type-0, already global */
76414e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = jj[j];
76424e84afc0SStefano Zampini           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
76434e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7644ddea5d60SJunchao Zhang           } else { /* type-2, local to global for sparse columns */
76454e84afc0SStefano Zampini             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
76464e84afc0SStefano Zampini           }
76474e84afc0SStefano Zampini           ncoo_d += nz;
76484e84afc0SStefano Zampini         }
76494e84afc0SStefano Zampini       }
76504e84afc0SStefano Zampini     }
76514e84afc0SStefano Zampini   }
765248a46eb9SPierre Jolivet   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
76539566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&glob));
765448a46eb9SPierre Jolivet   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
76559566063dSJacob Faibussowitsch   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7656ddea5d60SJunchao Zhang   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
76579566063dSJacob Faibussowitsch   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
76584e84afc0SStefano Zampini 
76594e84afc0SStefano Zampini   /* preallocate with COO data */
76609566063dSJacob Faibussowitsch   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
76619566063dSJacob Faibussowitsch   PetscCall(PetscFree2(coo_i, coo_j));
76623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
76634e84afc0SStefano Zampini }
76644e84afc0SStefano Zampini 
7665d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7666d71ae5a4SJacob Faibussowitsch {
76674e84afc0SStefano Zampini   Mat_Product *product = mat->product;
76684e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
76694e84afc0SStefano Zampini   PetscBool match  = PETSC_FALSE;
7670abb89eb1SStefano Zampini   PetscBool usecpu = PETSC_FALSE;
76714e84afc0SStefano Zampini #else
76724e84afc0SStefano Zampini   PetscBool match = PETSC_TRUE;
76734e84afc0SStefano Zampini #endif
76744e84afc0SStefano Zampini 
76754e84afc0SStefano Zampini   PetscFunctionBegin;
76764e84afc0SStefano Zampini   MatCheckProduct(mat, 1);
76774e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
767848a46eb9SPierre Jolivet   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
767965e4b4d4SStefano Zampini   if (match) { /* we can always fallback to the CPU if requested */
7680abb89eb1SStefano Zampini     switch (product->type) {
7681abb89eb1SStefano Zampini     case MATPRODUCT_AB:
7682abb89eb1SStefano Zampini       if (product->api_user) {
7683d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
76849566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7685d0609cedSBarry Smith         PetscOptionsEnd();
7686abb89eb1SStefano Zampini       } else {
7687d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
76889566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7689d0609cedSBarry Smith         PetscOptionsEnd();
7690abb89eb1SStefano Zampini       }
7691abb89eb1SStefano Zampini       break;
7692abb89eb1SStefano Zampini     case MATPRODUCT_AtB:
7693abb89eb1SStefano Zampini       if (product->api_user) {
7694d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
76959566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7696d0609cedSBarry Smith         PetscOptionsEnd();
7697abb89eb1SStefano Zampini       } else {
7698d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
76999566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7700d0609cedSBarry Smith         PetscOptionsEnd();
7701abb89eb1SStefano Zampini       }
7702abb89eb1SStefano Zampini       break;
7703abb89eb1SStefano Zampini     case MATPRODUCT_PtAP:
7704abb89eb1SStefano Zampini       if (product->api_user) {
7705d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
77069566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7707d0609cedSBarry Smith         PetscOptionsEnd();
7708abb89eb1SStefano Zampini       } else {
7709d0609cedSBarry Smith         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
77109566063dSJacob Faibussowitsch         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7711d0609cedSBarry Smith         PetscOptionsEnd();
7712abb89eb1SStefano Zampini       }
7713abb89eb1SStefano Zampini       break;
7714d71ae5a4SJacob Faibussowitsch     default:
7715d71ae5a4SJacob Faibussowitsch       break;
7716abb89eb1SStefano Zampini     }
7717abb89eb1SStefano Zampini     match = (PetscBool)!usecpu;
7718abb89eb1SStefano Zampini   }
77194e84afc0SStefano Zampini #endif
77204e84afc0SStefano Zampini   if (match) {
77214e84afc0SStefano Zampini     switch (product->type) {
77224e84afc0SStefano Zampini     case MATPRODUCT_AB:
77234e84afc0SStefano Zampini     case MATPRODUCT_AtB:
7724d71ae5a4SJacob Faibussowitsch     case MATPRODUCT_PtAP:
7725d71ae5a4SJacob Faibussowitsch       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7726d71ae5a4SJacob Faibussowitsch       break;
7727d71ae5a4SJacob Faibussowitsch     default:
7728d71ae5a4SJacob Faibussowitsch       break;
77294e84afc0SStefano Zampini     }
77304e84afc0SStefano Zampini   }
77314e84afc0SStefano Zampini   /* fallback to MPIAIJ ops */
77329566063dSJacob Faibussowitsch   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
77333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
773481824310SBarry Smith }
773598921bdaSJacob Faibussowitsch 
773698921bdaSJacob Faibussowitsch /*
773772833a62Smarkadams4    Produces a set of block column indices of the matrix row, one for each block represented in the original row
773872833a62Smarkadams4 
773972833a62Smarkadams4    n - the number of block indices in cc[]
774072833a62Smarkadams4    cc - the block indices (must be large enough to contain the indices)
774172833a62Smarkadams4 */
7742d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7743d71ae5a4SJacob Faibussowitsch {
774472833a62Smarkadams4   PetscInt        cnt = -1, nidx, j;
774572833a62Smarkadams4   const PetscInt *idx;
774672833a62Smarkadams4 
774772833a62Smarkadams4   PetscFunctionBegin;
774872833a62Smarkadams4   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
774972833a62Smarkadams4   if (nidx) {
775072833a62Smarkadams4     cnt     = 0;
775172833a62Smarkadams4     cc[cnt] = idx[0] / bs;
775272833a62Smarkadams4     for (j = 1; j < nidx; j++) {
775372833a62Smarkadams4       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
775472833a62Smarkadams4     }
775572833a62Smarkadams4   }
775672833a62Smarkadams4   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
775772833a62Smarkadams4   *n = cnt + 1;
77583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
775972833a62Smarkadams4 }
776072833a62Smarkadams4 
776172833a62Smarkadams4 /*
776272833a62Smarkadams4     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
776372833a62Smarkadams4 
776472833a62Smarkadams4     ncollapsed - the number of block indices
776572833a62Smarkadams4     collapsed - the block indices (must be large enough to contain the indices)
776672833a62Smarkadams4 */
7767d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7768d71ae5a4SJacob Faibussowitsch {
776972833a62Smarkadams4   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
777072833a62Smarkadams4 
777172833a62Smarkadams4   PetscFunctionBegin;
777272833a62Smarkadams4   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
777372833a62Smarkadams4   for (i = start + 1; i < start + bs; i++) {
777472833a62Smarkadams4     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
777572833a62Smarkadams4     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
77769371c9d4SSatish Balay     cprevtmp = cprev;
77779371c9d4SSatish Balay     cprev    = merged;
77789371c9d4SSatish Balay     merged   = cprevtmp;
777972833a62Smarkadams4   }
778072833a62Smarkadams4   *ncollapsed = nprev;
778172833a62Smarkadams4   if (collapsed) *collapsed = cprev;
77823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
778372833a62Smarkadams4 }
778472833a62Smarkadams4 
77852d776b49SBarry Smith /*
778672833a62Smarkadams4  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
778772833a62Smarkadams4 
778872833a62Smarkadams4  Input Parameter:
778972833a62Smarkadams4  . Amat - matrix
779072833a62Smarkadams4  - symmetrize - make the result symmetric
779172833a62Smarkadams4  + scale - scale with diagonal
779272833a62Smarkadams4 
779372833a62Smarkadams4  Output Parameter:
779472833a62Smarkadams4  . a_Gmat - output scalar graph >= 0
779572833a62Smarkadams4 
779672833a62Smarkadams4 */
7797e02fb3cdSMark Adams PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7798d71ae5a4SJacob Faibussowitsch {
779972833a62Smarkadams4   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
780072833a62Smarkadams4   MPI_Comm  comm;
780172833a62Smarkadams4   Mat       Gmat;
780272833a62Smarkadams4   PetscBool ismpiaij, isseqaij;
780372833a62Smarkadams4   Mat       a, b, c;
780472833a62Smarkadams4   MatType   jtype;
780572833a62Smarkadams4 
780672833a62Smarkadams4   PetscFunctionBegin;
780772833a62Smarkadams4   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
780872833a62Smarkadams4   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
780972833a62Smarkadams4   PetscCall(MatGetSize(Amat, &MM, &NN));
781072833a62Smarkadams4   PetscCall(MatGetBlockSize(Amat, &bs));
781172833a62Smarkadams4   nloc = (Iend - Istart) / bs;
781272833a62Smarkadams4 
781372833a62Smarkadams4   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
781472833a62Smarkadams4   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
781572833a62Smarkadams4   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
781672833a62Smarkadams4 
781772833a62Smarkadams4   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
781872833a62Smarkadams4   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
781972833a62Smarkadams4      implementation */
782072833a62Smarkadams4   if (bs > 1) {
782172833a62Smarkadams4     PetscCall(MatGetType(Amat, &jtype));
782272833a62Smarkadams4     PetscCall(MatCreate(comm, &Gmat));
782372833a62Smarkadams4     PetscCall(MatSetType(Gmat, jtype));
782472833a62Smarkadams4     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
782572833a62Smarkadams4     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
782672833a62Smarkadams4     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
782772833a62Smarkadams4       PetscInt  *d_nnz, *o_nnz;
78282cf69117Smarkadams4       MatScalar *aa, val, *AA;
78292cf69117Smarkadams4       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
78306497c311SBarry Smith 
78319371c9d4SSatish Balay       if (isseqaij) {
78329371c9d4SSatish Balay         a = Amat;
78339371c9d4SSatish Balay         b = NULL;
78349371c9d4SSatish Balay       } else {
783572833a62Smarkadams4         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
78369371c9d4SSatish Balay         a             = d->A;
78379371c9d4SSatish Balay         b             = d->B;
783872833a62Smarkadams4       }
783972833a62Smarkadams4       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
784032603206SJames Wright       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
784172833a62Smarkadams4       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
78422cf69117Smarkadams4         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
784359ee9f9fSPierre Jolivet         const PetscInt *cols1, *cols2;
78446497c311SBarry Smith 
784559ee9f9fSPierre Jolivet         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
784659ee9f9fSPierre Jolivet           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
784759ee9f9fSPierre Jolivet           nnz[brow / bs] = nc2 / bs;
784859ee9f9fSPierre Jolivet           if (nc2 % bs) ok = 0;
784972833a62Smarkadams4           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
785059ee9f9fSPierre Jolivet           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
785159ee9f9fSPierre Jolivet             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
785259ee9f9fSPierre Jolivet             if (nc1 != nc2) ok = 0;
785359ee9f9fSPierre Jolivet             else {
785459ee9f9fSPierre Jolivet               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
785559ee9f9fSPierre Jolivet                 if (cols1[jj] != cols2[jj]) ok = 0;
785659ee9f9fSPierre Jolivet                 if (cols1[jj] % bs != jj % bs) ok = 0;
785772833a62Smarkadams4               }
785859ee9f9fSPierre Jolivet             }
785959ee9f9fSPierre Jolivet             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
786059ee9f9fSPierre Jolivet           }
786159ee9f9fSPierre Jolivet           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
786272833a62Smarkadams4           if (!ok) {
786372833a62Smarkadams4             PetscCall(PetscFree2(d_nnz, o_nnz));
786459ee9f9fSPierre Jolivet             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
786572833a62Smarkadams4             goto old_bs;
786672833a62Smarkadams4           }
786772833a62Smarkadams4         }
786872833a62Smarkadams4       }
786972833a62Smarkadams4       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
787072833a62Smarkadams4       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
787172833a62Smarkadams4       PetscCall(PetscFree2(d_nnz, o_nnz));
78722cf69117Smarkadams4       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
787372833a62Smarkadams4       // diag
787472833a62Smarkadams4       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
787572833a62Smarkadams4         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
78766497c311SBarry Smith 
787772833a62Smarkadams4         ai = aseq->i;
787872833a62Smarkadams4         n  = ai[brow + 1] - ai[brow];
787972833a62Smarkadams4         aj = aseq->j + ai[brow];
78806497c311SBarry Smith         for (PetscInt k = 0; k < n; k += bs) {   // block columns
788172833a62Smarkadams4           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
788272833a62Smarkadams4           val        = 0;
7883e02fb3cdSMark Adams           if (index_size == 0) {
78846497c311SBarry Smith             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
788572833a62Smarkadams4               aa = aseq->a + ai[brow + ii] + k;
78866497c311SBarry Smith               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
788772833a62Smarkadams4                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
788872833a62Smarkadams4               }
788972833a62Smarkadams4             }
7890e02fb3cdSMark Adams           } else {                                            // use (index,index) value if provided
78916497c311SBarry Smith             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
78926497c311SBarry Smith               PetscInt ii = index[iii];
7893e02fb3cdSMark Adams               aa          = aseq->a + ai[brow + ii] + k;
78946497c311SBarry Smith               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
78956497c311SBarry Smith                 PetscInt jj = index[jjj];
7896cd5bc9d0SMark Adams                 val += PetscAbs(PetscRealPart(aa[jj]));
7897e02fb3cdSMark Adams               }
7898e02fb3cdSMark Adams             }
7899e02fb3cdSMark Adams           }
7900835f2295SStefano Zampini           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
790172833a62Smarkadams4           AA[k / bs] = val;
790272833a62Smarkadams4         }
790372833a62Smarkadams4         grow = Istart / bs + brow / bs;
790466521e1fSMark Adams         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
790572833a62Smarkadams4       }
790672833a62Smarkadams4       // off-diag
790772833a62Smarkadams4       if (ismpiaij) {
790872833a62Smarkadams4         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
790972833a62Smarkadams4         const PetscScalar *vals;
791072833a62Smarkadams4         const PetscInt    *cols, *garray = aij->garray;
79116497c311SBarry Smith 
791272833a62Smarkadams4         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
791372833a62Smarkadams4         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
791472833a62Smarkadams4           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
79156497c311SBarry Smith           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
791659ee9f9fSPierre Jolivet             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
791772833a62Smarkadams4             AA[k / bs] = 0;
791872833a62Smarkadams4             AJ[cidx]   = garray[cols[k]] / bs;
791972833a62Smarkadams4           }
792072833a62Smarkadams4           nc = ncols / bs;
792172833a62Smarkadams4           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7922e02fb3cdSMark Adams           if (index_size == 0) {
79236497c311SBarry Smith             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
792472833a62Smarkadams4               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
79256497c311SBarry Smith               for (PetscInt k = 0; k < ncols; k += bs) {
79266497c311SBarry Smith                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7927835f2295SStefano Zampini                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
792872833a62Smarkadams4                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
792972833a62Smarkadams4                 }
793072833a62Smarkadams4               }
793172833a62Smarkadams4               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
793272833a62Smarkadams4             }
7933e02fb3cdSMark Adams           } else {                                            // use (index,index) value if provided
79346497c311SBarry Smith             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
79356497c311SBarry Smith               PetscInt ii = index[iii];
7936e02fb3cdSMark Adams               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
79376497c311SBarry Smith               for (PetscInt k = 0; k < ncols; k += bs) {
79386497c311SBarry Smith                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
79396497c311SBarry Smith                   PetscInt jj = index[jjj];
7940e02fb3cdSMark Adams                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7941e02fb3cdSMark Adams                 }
7942e02fb3cdSMark Adams               }
7943e02fb3cdSMark Adams               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7944e02fb3cdSMark Adams             }
7945e02fb3cdSMark Adams           }
794672833a62Smarkadams4           grow = Istart / bs + brow / bs;
794766521e1fSMark Adams           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
794872833a62Smarkadams4         }
794972833a62Smarkadams4       }
795072833a62Smarkadams4       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
795172833a62Smarkadams4       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
79522cf69117Smarkadams4       PetscCall(PetscFree2(AA, AJ));
795372833a62Smarkadams4     } else {
795472833a62Smarkadams4       const PetscScalar *vals;
795572833a62Smarkadams4       const PetscInt    *idx;
795672833a62Smarkadams4       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
795772833a62Smarkadams4     old_bs:
795872833a62Smarkadams4       /*
795972833a62Smarkadams4        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
796072833a62Smarkadams4        */
796172833a62Smarkadams4       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
796232603206SJames Wright       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
796372833a62Smarkadams4       if (isseqaij) {
796472833a62Smarkadams4         PetscInt max_d_nnz;
79656497c311SBarry Smith 
796672833a62Smarkadams4         /*
796772833a62Smarkadams4          Determine exact preallocation count for (sequential) scalar matrix
796872833a62Smarkadams4          */
796972833a62Smarkadams4         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
797072833a62Smarkadams4         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
797172833a62Smarkadams4         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
797248a46eb9SPierre Jolivet         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
797372833a62Smarkadams4         PetscCall(PetscFree3(w0, w1, w2));
797472833a62Smarkadams4       } else if (ismpiaij) {
797572833a62Smarkadams4         Mat             Daij, Oaij;
797672833a62Smarkadams4         const PetscInt *garray;
797772833a62Smarkadams4         PetscInt        max_d_nnz;
79786497c311SBarry Smith 
797972833a62Smarkadams4         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
798072833a62Smarkadams4         /*
798172833a62Smarkadams4          Determine exact preallocation count for diagonal block portion of scalar matrix
798272833a62Smarkadams4          */
798372833a62Smarkadams4         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
798472833a62Smarkadams4         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
798572833a62Smarkadams4         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
798648a46eb9SPierre Jolivet         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
798772833a62Smarkadams4         PetscCall(PetscFree3(w0, w1, w2));
798872833a62Smarkadams4         /*
798972833a62Smarkadams4          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
799072833a62Smarkadams4          */
799172833a62Smarkadams4         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
799272833a62Smarkadams4           o_nnz[jj] = 0;
799372833a62Smarkadams4           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
799472833a62Smarkadams4             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
799572833a62Smarkadams4             o_nnz[jj] += ncols;
799672833a62Smarkadams4             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
799772833a62Smarkadams4           }
799872833a62Smarkadams4           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
799972833a62Smarkadams4         }
800072833a62Smarkadams4       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
800172833a62Smarkadams4       /* get scalar copy (norms) of matrix */
800272833a62Smarkadams4       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
800372833a62Smarkadams4       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
800472833a62Smarkadams4       PetscCall(PetscFree2(d_nnz, o_nnz));
800572833a62Smarkadams4       for (Ii = Istart; Ii < Iend; Ii++) {
800672833a62Smarkadams4         PetscInt dest_row = Ii / bs;
80076497c311SBarry Smith 
800872833a62Smarkadams4         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
800972833a62Smarkadams4         for (jj = 0; jj < ncols; jj++) {
801072833a62Smarkadams4           PetscInt    dest_col = idx[jj] / bs;
801172833a62Smarkadams4           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
80126497c311SBarry Smith 
801372833a62Smarkadams4           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
801472833a62Smarkadams4         }
801572833a62Smarkadams4         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
801672833a62Smarkadams4       }
801772833a62Smarkadams4       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
801872833a62Smarkadams4       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
801972833a62Smarkadams4     }
802072833a62Smarkadams4   } else {
80212d776b49SBarry Smith     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
80222d776b49SBarry Smith     else {
80232d776b49SBarry Smith       Gmat = Amat;
80242d776b49SBarry Smith       PetscCall(PetscObjectReference((PetscObject)Gmat));
80252d776b49SBarry Smith     }
80269371c9d4SSatish Balay     if (isseqaij) {
80279371c9d4SSatish Balay       a = Gmat;
80289371c9d4SSatish Balay       b = NULL;
80299371c9d4SSatish Balay     } else {
803072833a62Smarkadams4       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
80319371c9d4SSatish Balay       a             = d->A;
80329371c9d4SSatish Balay       b             = d->B;
803372833a62Smarkadams4     }
80342d776b49SBarry Smith     if (filter >= 0 || scale) {
80352d776b49SBarry Smith       /* take absolute value of each entry */
803672833a62Smarkadams4       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
803772833a62Smarkadams4         MatInfo      info;
803872833a62Smarkadams4         PetscScalar *avals;
80396497c311SBarry Smith 
804072833a62Smarkadams4         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
804172833a62Smarkadams4         PetscCall(MatSeqAIJGetArray(c, &avals));
804272833a62Smarkadams4         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
804372833a62Smarkadams4         PetscCall(MatSeqAIJRestoreArray(c, &avals));
804472833a62Smarkadams4       }
804572833a62Smarkadams4     }
80462d776b49SBarry Smith   }
804772833a62Smarkadams4   if (symmetrize) {
8048b94d7dedSBarry Smith     PetscBool isset, issym;
80496497c311SBarry Smith 
8050b94d7dedSBarry Smith     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8051b94d7dedSBarry Smith     if (!isset || !issym) {
805272833a62Smarkadams4       Mat matTrans;
80536497c311SBarry Smith 
805472833a62Smarkadams4       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
80551fcb517eSBarry Smith       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
805672833a62Smarkadams4       PetscCall(MatDestroy(&matTrans));
805772833a62Smarkadams4     }
805872833a62Smarkadams4     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
80592d776b49SBarry Smith   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
806072833a62Smarkadams4   if (scale) {
806172833a62Smarkadams4     /* scale c for all diagonal values = 1 or -1 */
806272833a62Smarkadams4     Vec diag;
80636497c311SBarry Smith 
806472833a62Smarkadams4     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
806572833a62Smarkadams4     PetscCall(MatGetDiagonal(Gmat, diag));
806672833a62Smarkadams4     PetscCall(VecReciprocal(diag));
806772833a62Smarkadams4     PetscCall(VecSqrtAbs(diag));
806872833a62Smarkadams4     PetscCall(MatDiagonalScale(Gmat, diag, diag));
806972833a62Smarkadams4     PetscCall(VecDestroy(&diag));
807072833a62Smarkadams4   }
807172833a62Smarkadams4   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
80722d776b49SBarry Smith   if (filter >= 0) {
80732ce66baaSPierre Jolivet     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
80742ce66baaSPierre Jolivet     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
80752d776b49SBarry Smith   }
807672833a62Smarkadams4   *a_Gmat = Gmat;
80773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
807872833a62Smarkadams4 }
807972833a62Smarkadams4 
808072833a62Smarkadams4 /*
808198921bdaSJacob Faibussowitsch     Special version for direct calls from Fortran
808298921bdaSJacob Faibussowitsch */
808398921bdaSJacob Faibussowitsch 
808498921bdaSJacob Faibussowitsch /* Change these macros so can be used in void function */
80859566063dSJacob Faibussowitsch /* Identical to PetscCallVoid, except it assigns to *_ierr */
80869566063dSJacob Faibussowitsch #undef PetscCall
80879371c9d4SSatish Balay #define PetscCall(...) \
80889371c9d4SSatish Balay   do { \
80895f80ce2aSJacob Faibussowitsch     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
809098921bdaSJacob Faibussowitsch     if (PetscUnlikely(ierr_msv_mpiaij)) { \
809198921bdaSJacob Faibussowitsch       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
809298921bdaSJacob Faibussowitsch       return; \
809398921bdaSJacob Faibussowitsch     } \
809498921bdaSJacob Faibussowitsch   } while (0)
809598921bdaSJacob Faibussowitsch 
809698921bdaSJacob Faibussowitsch #undef SETERRQ
80979371c9d4SSatish Balay #define SETERRQ(comm, ierr, ...) \
80989371c9d4SSatish Balay   do { \
809998921bdaSJacob Faibussowitsch     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
810098921bdaSJacob Faibussowitsch     return; \
810198921bdaSJacob Faibussowitsch   } while (0)
810298921bdaSJacob Faibussowitsch 
810398921bdaSJacob Faibussowitsch #if defined(PETSC_HAVE_FORTRAN_CAPS)
810498921bdaSJacob Faibussowitsch   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
810598921bdaSJacob Faibussowitsch #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
810698921bdaSJacob Faibussowitsch   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
810798921bdaSJacob Faibussowitsch #else
810898921bdaSJacob Faibussowitsch #endif
8109d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8110d71ae5a4SJacob Faibussowitsch {
811198921bdaSJacob Faibussowitsch   Mat         mat = *mmat;
811298921bdaSJacob Faibussowitsch   PetscInt    m = *mm, n = *mn;
811398921bdaSJacob Faibussowitsch   InsertMode  addv = *maddv;
811498921bdaSJacob Faibussowitsch   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
811598921bdaSJacob Faibussowitsch   PetscScalar value;
811698921bdaSJacob Faibussowitsch 
811798921bdaSJacob Faibussowitsch   MatCheckPreallocated(mat, 1);
811898921bdaSJacob Faibussowitsch   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
81195f80ce2aSJacob Faibussowitsch   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
812098921bdaSJacob Faibussowitsch   {
812198921bdaSJacob Faibussowitsch     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
812298921bdaSJacob Faibussowitsch     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
812398921bdaSJacob Faibussowitsch     PetscBool roworiented = aij->roworiented;
812498921bdaSJacob Faibussowitsch 
812598921bdaSJacob Faibussowitsch     /* Some Variables required in the macro */
812698921bdaSJacob Faibussowitsch     Mat         A     = aij->A;
812798921bdaSJacob Faibussowitsch     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
812898921bdaSJacob Faibussowitsch     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
812998921bdaSJacob Faibussowitsch     MatScalar  *aa;
8130f4f49eeaSPierre Jolivet     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
813198921bdaSJacob Faibussowitsch     Mat         B                 = aij->B;
813298921bdaSJacob Faibussowitsch     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
813398921bdaSJacob Faibussowitsch     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
813498921bdaSJacob Faibussowitsch     MatScalar  *ba;
813598921bdaSJacob Faibussowitsch     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
813698921bdaSJacob Faibussowitsch      * cannot use "#if defined" inside a macro. */
813798921bdaSJacob Faibussowitsch     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
813898921bdaSJacob Faibussowitsch 
813998921bdaSJacob Faibussowitsch     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
814098921bdaSJacob Faibussowitsch     PetscInt   nonew = a->nonew;
814198921bdaSJacob Faibussowitsch     MatScalar *ap1, *ap2;
814298921bdaSJacob Faibussowitsch 
814398921bdaSJacob Faibussowitsch     PetscFunctionBegin;
81449566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(A, &aa));
81459566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArray(B, &ba));
814698921bdaSJacob Faibussowitsch     for (i = 0; i < m; i++) {
814798921bdaSJacob Faibussowitsch       if (im[i] < 0) continue;
81486bdcaf15SBarry Smith       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
814998921bdaSJacob Faibussowitsch       if (im[i] >= rstart && im[i] < rend) {
815098921bdaSJacob Faibussowitsch         row      = im[i] - rstart;
815198921bdaSJacob Faibussowitsch         lastcol1 = -1;
815298921bdaSJacob Faibussowitsch         rp1      = aj + ai[row];
815398921bdaSJacob Faibussowitsch         ap1      = aa + ai[row];
815498921bdaSJacob Faibussowitsch         rmax1    = aimax[row];
815598921bdaSJacob Faibussowitsch         nrow1    = ailen[row];
815698921bdaSJacob Faibussowitsch         low1     = 0;
815798921bdaSJacob Faibussowitsch         high1    = nrow1;
815898921bdaSJacob Faibussowitsch         lastcol2 = -1;
815998921bdaSJacob Faibussowitsch         rp2      = bj + bi[row];
816098921bdaSJacob Faibussowitsch         ap2      = ba + bi[row];
816198921bdaSJacob Faibussowitsch         rmax2    = bimax[row];
816298921bdaSJacob Faibussowitsch         nrow2    = bilen[row];
816398921bdaSJacob Faibussowitsch         low2     = 0;
816498921bdaSJacob Faibussowitsch         high2    = nrow2;
816598921bdaSJacob Faibussowitsch 
816698921bdaSJacob Faibussowitsch         for (j = 0; j < n; j++) {
816798921bdaSJacob Faibussowitsch           if (roworiented) value = v[i * n + j];
816898921bdaSJacob Faibussowitsch           else value = v[i + j * m];
816998921bdaSJacob Faibussowitsch           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
817098921bdaSJacob Faibussowitsch           if (in[j] >= cstart && in[j] < cend) {
817198921bdaSJacob Faibussowitsch             col = in[j] - cstart;
817298921bdaSJacob Faibussowitsch             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
817398921bdaSJacob Faibussowitsch           } else if (in[j] < 0) continue;
817498921bdaSJacob Faibussowitsch           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
817563a3b9bcSJacob Faibussowitsch             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
817698921bdaSJacob Faibussowitsch           } else {
817798921bdaSJacob Faibussowitsch             if (mat->was_assembled) {
817848a46eb9SPierre Jolivet               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
817998921bdaSJacob Faibussowitsch #if defined(PETSC_USE_CTABLE)
8180eec179cfSJacob Faibussowitsch               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
818198921bdaSJacob Faibussowitsch               col--;
818298921bdaSJacob Faibussowitsch #else
818398921bdaSJacob Faibussowitsch               col = aij->colmap[in[j]] - 1;
818498921bdaSJacob Faibussowitsch #endif
8185f4f49eeaSPierre Jolivet               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
81869566063dSJacob Faibussowitsch                 PetscCall(MatDisAssemble_MPIAIJ(mat));
818798921bdaSJacob Faibussowitsch                 col = in[j];
818898921bdaSJacob Faibussowitsch                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
818998921bdaSJacob Faibussowitsch                 B        = aij->B;
819098921bdaSJacob Faibussowitsch                 b        = (Mat_SeqAIJ *)B->data;
81919371c9d4SSatish Balay                 bimax    = b->imax;
81929371c9d4SSatish Balay                 bi       = b->i;
81939371c9d4SSatish Balay                 bilen    = b->ilen;
81949371c9d4SSatish Balay                 bj       = b->j;
819598921bdaSJacob Faibussowitsch                 rp2      = bj + bi[row];
819698921bdaSJacob Faibussowitsch                 ap2      = ba + bi[row];
819798921bdaSJacob Faibussowitsch                 rmax2    = bimax[row];
819898921bdaSJacob Faibussowitsch                 nrow2    = bilen[row];
819998921bdaSJacob Faibussowitsch                 low2     = 0;
820098921bdaSJacob Faibussowitsch                 high2    = nrow2;
820198921bdaSJacob Faibussowitsch                 bm       = aij->B->rmap->n;
820298921bdaSJacob Faibussowitsch                 ba       = b->a;
820398921bdaSJacob Faibussowitsch                 inserted = PETSC_FALSE;
820498921bdaSJacob Faibussowitsch               }
820598921bdaSJacob Faibussowitsch             } else col = in[j];
820698921bdaSJacob Faibussowitsch             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
820798921bdaSJacob Faibussowitsch           }
820898921bdaSJacob Faibussowitsch         }
820998921bdaSJacob Faibussowitsch       } else if (!aij->donotstash) {
821098921bdaSJacob Faibussowitsch         if (roworiented) {
82119566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
821298921bdaSJacob Faibussowitsch         } else {
82139566063dSJacob Faibussowitsch           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
821498921bdaSJacob Faibussowitsch         }
821598921bdaSJacob Faibussowitsch       }
821698921bdaSJacob Faibussowitsch     }
82179566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(A, &aa));
82189566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArray(B, &ba));
821998921bdaSJacob Faibussowitsch   }
822098921bdaSJacob Faibussowitsch   PetscFunctionReturnVoid();
822198921bdaSJacob Faibussowitsch }
822272833a62Smarkadams4 
822398921bdaSJacob Faibussowitsch /* Undefining these here since they were redefined from their original definition above! No
822498921bdaSJacob Faibussowitsch  * other PETSc functions should be defined past this point, as it is impossible to recover the
822598921bdaSJacob Faibussowitsch  * original definitions */
82269566063dSJacob Faibussowitsch #undef PetscCall
822798921bdaSJacob Faibussowitsch #undef SETERRQ
8228