1c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 397929ea7SJunchao Zhang #include <petsc/private/sfimpl.h> 4af0996ceSBarry Smith #include <petsc/private/isimpl.h> 5c6db04a5SJed Brown #include <petscblaslapack.h> 60c312b8eSJed Brown #include <petscsf.h> 7bc8e477aSFande Kong #include <petsc/private/hashmapi.h> 88a729477SBarry Smith 9*674b392bSAlexander /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10*674b392bSAlexander #define TYPE AIJ 11*674b392bSAlexander #define TYPE_AIJ 12*674b392bSAlexander #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13*674b392bSAlexander #undef TYPE 14*674b392bSAlexander #undef TYPE_AIJ 15*674b392bSAlexander 16*674b392bSAlexander static PetscErrorCode MatReset_MPIAIJ(Mat mat) 1726cec326SBarry Smith { 1826cec326SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1926cec326SBarry Smith 2026cec326SBarry Smith PetscFunctionBegin; 2126cec326SBarry Smith PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 2226cec326SBarry Smith PetscCall(MatStashDestroy_Private(&mat->stash)); 2326cec326SBarry Smith PetscCall(VecDestroy(&aij->diag)); 2426cec326SBarry Smith PetscCall(MatDestroy(&aij->A)); 2526cec326SBarry Smith PetscCall(MatDestroy(&aij->B)); 2626cec326SBarry Smith #if defined(PETSC_USE_CTABLE) 2726cec326SBarry Smith PetscCall(PetscHMapIDestroy(&aij->colmap)); 2826cec326SBarry Smith #else 2926cec326SBarry Smith PetscCall(PetscFree(aij->colmap)); 3026cec326SBarry Smith #endif 3126cec326SBarry Smith PetscCall(PetscFree(aij->garray)); 3226cec326SBarry Smith PetscCall(VecDestroy(&aij->lvec)); 3326cec326SBarry Smith PetscCall(VecScatterDestroy(&aij->Mvctx)); 3426cec326SBarry Smith PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 3526cec326SBarry Smith PetscCall(PetscFree(aij->ld)); 36*674b392bSAlexander PetscFunctionReturn(PETSC_SUCCESS); 37*674b392bSAlexander } 38*674b392bSAlexander 39*674b392bSAlexander static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40*674b392bSAlexander { 41*674b392bSAlexander Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42*674b392bSAlexander /* Save the nonzero states of the component matrices because those are what are used to determine 43*674b392bSAlexander the nonzero state of mat */ 44*674b392bSAlexander PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45*674b392bSAlexander 46*674b392bSAlexander PetscFunctionBegin; 47*674b392bSAlexander PetscCall(MatReset_MPIAIJ(mat)); 48*674b392bSAlexander PetscCall(MatSetUp_MPI_Hash(mat)); 49*674b392bSAlexander aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50*674b392bSAlexander PetscFunctionReturn(PETSC_SUCCESS); 51*674b392bSAlexander } 52*674b392bSAlexander 53*674b392bSAlexander PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54*674b392bSAlexander { 55*674b392bSAlexander PetscFunctionBegin; 56*674b392bSAlexander PetscCall(MatReset_MPIAIJ(mat)); 5726cec326SBarry Smith 5826cec326SBarry Smith PetscCall(PetscFree(mat->data)); 5926cec326SBarry Smith 6026cec326SBarry Smith /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 6126cec326SBarry Smith PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 6226cec326SBarry Smith 6326cec326SBarry Smith PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 6426cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 6526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 6626cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 6726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 6826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69*674b392bSAlexander PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 7026cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 7126cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 7226cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 7326cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 7426cec326SBarry Smith #if defined(PETSC_HAVE_CUDA) 7526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 7626cec326SBarry Smith #endif 7726cec326SBarry Smith #if defined(PETSC_HAVE_HIP) 7826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 7926cec326SBarry Smith #endif 8026cec326SBarry Smith #if defined(PETSC_HAVE_KOKKOS_KERNELS) 8126cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 8226cec326SBarry Smith #endif 8326cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 8426cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL) 8526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 8626cec326SBarry Smith #endif 8726cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK) 8826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 8926cec326SBarry Smith #endif 9026cec326SBarry Smith #if defined(PETSC_HAVE_HYPRE) 9126cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 9226cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 9326cec326SBarry Smith #endif 9426cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 9526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 9626cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 9726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 9826cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 9926cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 10026cec326SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE) 10126cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 10226cec326SBarry Smith #endif 10326cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 10426cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 10526cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 10626cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 10726cec326SBarry Smith PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 10826cec326SBarry Smith PetscFunctionReturn(PETSC_SUCCESS); 10926cec326SBarry Smith } 11026cec326SBarry Smith 111ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112d71ae5a4SJacob Faibussowitsch { 1138a9c020eSBarry Smith Mat B; 1148a9c020eSBarry Smith 1158a9c020eSBarry Smith PetscFunctionBegin; 1168a9c020eSBarry Smith PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 1178a9c020eSBarry Smith PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 1188a9c020eSBarry Smith PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119501b8e33SLisandro Dalcin PetscCall(MatDestroy(&B)); 1203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1218a9c020eSBarry Smith } 1228a9c020eSBarry Smith 123ba38deedSJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124d71ae5a4SJacob Faibussowitsch { 1258a9c020eSBarry Smith Mat B; 1268a9c020eSBarry Smith 1278a9c020eSBarry Smith PetscFunctionBegin; 1288a9c020eSBarry Smith PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 1298a9c020eSBarry Smith PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130501b8e33SLisandro Dalcin PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 1313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1328a9c020eSBarry Smith } 1338a9c020eSBarry Smith 13401bebe75SBarry Smith /*MC 13501bebe75SBarry Smith MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 13601bebe75SBarry Smith 13711a5261eSBarry Smith This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 13811a5261eSBarry Smith and `MATMPIAIJ` otherwise. As a result, for single process communicators, 13911a5261eSBarry Smith `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 14001bebe75SBarry Smith for communicators controlling multiple processes. It is recommended that you call both of 14101bebe75SBarry Smith the above preallocation routines for simplicity. 14201bebe75SBarry Smith 14327430b45SBarry Smith Options Database Key: 14411a5261eSBarry Smith . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 14501bebe75SBarry Smith 14611a5261eSBarry Smith Developer Note: 1472ef1f0ffSBarry Smith Level: beginner 1482ef1f0ffSBarry Smith 14911a5261eSBarry Smith Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 15001bebe75SBarry Smith enough exist. 15101bebe75SBarry Smith 1521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 15301bebe75SBarry Smith M*/ 15401bebe75SBarry Smith 15501bebe75SBarry Smith /*MC 15601bebe75SBarry Smith MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 15701bebe75SBarry Smith 15811a5261eSBarry Smith This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 15911a5261eSBarry Smith and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 16011a5261eSBarry Smith `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 16101bebe75SBarry Smith for communicators controlling multiple processes. It is recommended that you call both of 16201bebe75SBarry Smith the above preallocation routines for simplicity. 16301bebe75SBarry Smith 16427430b45SBarry Smith Options Database Key: 16511a5261eSBarry Smith . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 16601bebe75SBarry Smith 16701bebe75SBarry Smith Level: beginner 16801bebe75SBarry Smith 1691cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 17001bebe75SBarry Smith M*/ 17101bebe75SBarry Smith 172d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173d71ae5a4SJacob Faibussowitsch { 174f74ef234SStefano Zampini Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175f74ef234SStefano Zampini 176f74ef234SStefano Zampini PetscFunctionBegin; 177d5e393b6SSuyash Tandon #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178b470e4b4SRichard Tran Mills A->boundtocpu = flg; 179f74ef234SStefano Zampini #endif 1801baa6e33SBarry Smith if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 1811baa6e33SBarry Smith if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 1823120d049SRichard Tran Mills 1833120d049SRichard Tran Mills /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 1843120d049SRichard Tran Mills * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 1853120d049SRichard Tran Mills * to differ from the parent matrix. */ 1861baa6e33SBarry Smith if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 1871baa6e33SBarry Smith if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 1883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 189f74ef234SStefano Zampini } 190f74ef234SStefano Zampini 191ba38deedSJacob Faibussowitsch static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192d71ae5a4SJacob Faibussowitsch { 19326bda2c4Sstefano_zampini Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 19426bda2c4Sstefano_zampini 19526bda2c4Sstefano_zampini PetscFunctionBegin; 19646533700Sstefano_zampini if (mat->A) { 1979566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 1989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 19946533700Sstefano_zampini } 2003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20126bda2c4Sstefano_zampini } 20226bda2c4Sstefano_zampini 203ba38deedSJacob Faibussowitsch static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204d71ae5a4SJacob Faibussowitsch { 20527d4218bSShri Abhyankar Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 20627d4218bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 20727d4218bSShri Abhyankar Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 20827d4218bSShri Abhyankar const PetscInt *ia, *ib; 209ce496241SStefano Zampini const MatScalar *aa, *bb, *aav, *bav; 21027d4218bSShri Abhyankar PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 21127d4218bSShri Abhyankar PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 21227d4218bSShri Abhyankar 21327d4218bSShri Abhyankar PetscFunctionBegin; 214f4259b30SLisandro Dalcin *keptrows = NULL; 215ce496241SStefano Zampini 21627d4218bSShri Abhyankar ia = a->i; 21727d4218bSShri Abhyankar ib = b->i; 2189566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 2199566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 22027d4218bSShri Abhyankar for (i = 0; i < m; i++) { 22127d4218bSShri Abhyankar na = ia[i + 1] - ia[i]; 22227d4218bSShri Abhyankar nb = ib[i + 1] - ib[i]; 22327d4218bSShri Abhyankar if (!na && !nb) { 22427d4218bSShri Abhyankar cnt++; 22527d4218bSShri Abhyankar goto ok1; 22627d4218bSShri Abhyankar } 227ce496241SStefano Zampini aa = aav + ia[i]; 22827d4218bSShri Abhyankar for (j = 0; j < na; j++) { 22927d4218bSShri Abhyankar if (aa[j] != 0.0) goto ok1; 23027d4218bSShri Abhyankar } 2318e3a54c0SPierre Jolivet bb = PetscSafePointerPlusOffset(bav, ib[i]); 23227d4218bSShri Abhyankar for (j = 0; j < nb; j++) { 23327d4218bSShri Abhyankar if (bb[j] != 0.0) goto ok1; 23427d4218bSShri Abhyankar } 23527d4218bSShri Abhyankar cnt++; 23627d4218bSShri Abhyankar ok1:; 23727d4218bSShri Abhyankar } 238462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239ce496241SStefano Zampini if (!n0rows) { 2409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 2419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 2423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 243ce496241SStefano Zampini } 2449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 24527d4218bSShri Abhyankar cnt = 0; 24627d4218bSShri Abhyankar for (i = 0; i < m; i++) { 24727d4218bSShri Abhyankar na = ia[i + 1] - ia[i]; 24827d4218bSShri Abhyankar nb = ib[i + 1] - ib[i]; 24927d4218bSShri Abhyankar if (!na && !nb) continue; 250ce496241SStefano Zampini aa = aav + ia[i]; 25127d4218bSShri Abhyankar for (j = 0; j < na; j++) { 25227d4218bSShri Abhyankar if (aa[j] != 0.0) { 25327d4218bSShri Abhyankar rows[cnt++] = rstart + i; 25427d4218bSShri Abhyankar goto ok2; 25527d4218bSShri Abhyankar } 25627d4218bSShri Abhyankar } 2578e3a54c0SPierre Jolivet bb = PetscSafePointerPlusOffset(bav, ib[i]); 25827d4218bSShri Abhyankar for (j = 0; j < nb; j++) { 25927d4218bSShri Abhyankar if (bb[j] != 0.0) { 26027d4218bSShri Abhyankar rows[cnt++] = rstart + i; 26127d4218bSShri Abhyankar goto ok2; 26227d4218bSShri Abhyankar } 26327d4218bSShri Abhyankar } 26427d4218bSShri Abhyankar ok2:; 26527d4218bSShri Abhyankar } 2669566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 2679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 2689566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 2693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27027d4218bSShri Abhyankar } 27127d4218bSShri Abhyankar 272ba38deedSJacob Faibussowitsch static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273d71ae5a4SJacob Faibussowitsch { 27499e65526SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 27594342113SStefano Zampini PetscBool cong; 27699e65526SBarry Smith 27799e65526SBarry Smith PetscFunctionBegin; 2789566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(Y, &cong)); 27994342113SStefano Zampini if (Y->assembled && cong) { 2809566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(aij->A, D, is)); 28199e65526SBarry Smith } else { 2829566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet_Default(Y, D, is)); 28399e65526SBarry Smith } 2843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28599e65526SBarry Smith } 28699e65526SBarry Smith 287ba38deedSJacob Faibussowitsch static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288d71ae5a4SJacob Faibussowitsch { 289f1f41ecbSJed Brown Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290f1f41ecbSJed Brown PetscInt i, rstart, nrows, *rows; 291f1f41ecbSJed Brown 292f1f41ecbSJed Brown PetscFunctionBegin; 2930298fd71SBarry Smith *zrows = NULL; 2949566063dSJacob Faibussowitsch PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 2959566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296f1f41ecbSJed Brown for (i = 0; i < nrows; i++) rows[i] += rstart; 2979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 2983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 299f1f41ecbSJed Brown } 300f1f41ecbSJed Brown 301ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302d71ae5a4SJacob Faibussowitsch { 3030716a85fSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304a873a8cdSSam Reynolds PetscInt i, m, n, *garray = aij->garray; 3050716a85fSBarry Smith Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 3060716a85fSBarry Smith Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 3070716a85fSBarry Smith PetscReal *work; 308ce496241SStefano Zampini const PetscScalar *dummy; 3096497c311SBarry Smith PetscMPIInt in; 3100716a85fSBarry Smith 3110716a85fSBarry Smith PetscFunctionBegin; 3129566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 3139566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &work)); 3149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 3159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 3169566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 3179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 318857cbf51SRichard Tran Mills if (type == NORM_2) { 319ad540459SPierre Jolivet for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 320ad540459SPierre Jolivet for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 321857cbf51SRichard Tran Mills } else if (type == NORM_1) { 322ad540459SPierre Jolivet for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 323ad540459SPierre Jolivet for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 324857cbf51SRichard Tran Mills } else if (type == NORM_INFINITY) { 325ad540459SPierre Jolivet for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 326ad540459SPierre Jolivet for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 327857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 328ad540459SPierre Jolivet for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 329ad540459SPierre Jolivet for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 330857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 331ad540459SPierre Jolivet for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 332ad540459SPierre Jolivet for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 333857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 3346497c311SBarry Smith PetscCall(PetscMPIIntCast(n, &in)); 335857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 336462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 3370716a85fSBarry Smith } else { 338462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 3390716a85fSBarry Smith } 3409566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 341857cbf51SRichard Tran Mills if (type == NORM_2) { 342a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 343857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 344a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] /= m; 3450716a85fSBarry Smith } 3463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3470716a85fSBarry Smith } 3480716a85fSBarry Smith 349ba38deedSJacob Faibussowitsch static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 350d71ae5a4SJacob Faibussowitsch { 351e52d2c62SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 352e52d2c62SBarry Smith IS sis, gis; 353e52d2c62SBarry Smith const PetscInt *isis, *igis; 354e52d2c62SBarry Smith PetscInt n, *iis, nsis, ngis, rstart, i; 355e52d2c62SBarry Smith 356e52d2c62SBarry Smith PetscFunctionBegin; 3579566063dSJacob Faibussowitsch PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 3589566063dSJacob Faibussowitsch PetscCall(MatFindNonzeroRows(a->B, &gis)); 3599566063dSJacob Faibussowitsch PetscCall(ISGetSize(gis, &ngis)); 3609566063dSJacob Faibussowitsch PetscCall(ISGetSize(sis, &nsis)); 3619566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sis, &isis)); 3629566063dSJacob Faibussowitsch PetscCall(ISGetIndices(gis, &igis)); 363e52d2c62SBarry Smith 3649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ngis + nsis, &iis)); 3659566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(iis, igis, ngis)); 3669566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 367e52d2c62SBarry Smith n = ngis + nsis; 3689566063dSJacob Faibussowitsch PetscCall(PetscSortRemoveDupsInt(&n, iis)); 3699566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 370e52d2c62SBarry Smith for (i = 0; i < n; i++) iis[i] += rstart; 3719566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 372e52d2c62SBarry Smith 3739566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sis, &isis)); 3749566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(gis, &igis)); 3759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sis)); 3769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&gis)); 3773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 378e52d2c62SBarry Smith } 379e52d2c62SBarry Smith 380dd6ea824SBarry Smith /* 3810f5bd95cSBarry Smith Local utility routine that creates a mapping from the global column 3829e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 3830f5bd95cSBarry Smith storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 3840f5bd95cSBarry Smith a slightly higher hash table cost; without it it is not scalable (each processor 38572fa4726SStefano Zampini has an order N integer array but is fast to access. 3869e25ed09SBarry Smith */ 387d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 388d71ae5a4SJacob Faibussowitsch { 38944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 390d0f46423SBarry Smith PetscInt n = aij->B->cmap->n, i; 391dbb450caSBarry Smith 3923a40ed3dSBarry Smith PetscFunctionBegin; 39308401ef6SPierre Jolivet PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 394aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 395eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 396c76ffc5fSJacob Faibussowitsch for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 397b1fc9764SSatish Balay #else 3989566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 399905e6a2fSBarry Smith for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 400b1fc9764SSatish Balay #endif 4013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4029e25ed09SBarry Smith } 4039e25ed09SBarry Smith 404d40312a9SBarry Smith #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 405a8f51744SPierre Jolivet do { \ 406db4deed7SKarl Rupp if (col <= lastcol1) low1 = 0; \ 407db4deed7SKarl Rupp else high1 = nrow1; \ 408fd3458f5SBarry Smith lastcol1 = col; \ 409fd3458f5SBarry Smith while (high1 - low1 > 5) { \ 410fd3458f5SBarry Smith t = (low1 + high1) / 2; \ 411fd3458f5SBarry Smith if (rp1[t] > col) high1 = t; \ 412fd3458f5SBarry Smith else low1 = t; \ 413ba4e3ef2SSatish Balay } \ 414fd3458f5SBarry Smith for (_i = low1; _i < high1; _i++) { \ 415fd3458f5SBarry Smith if (rp1[_i] > col) break; \ 416fd3458f5SBarry Smith if (rp1[_i] == col) { \ 4170c0d7e18SFande Kong if (addv == ADD_VALUES) { \ 4180c0d7e18SFande Kong ap1[_i] += value; \ 4190c0d7e18SFande Kong /* Not sure LogFlops will slow dow the code or not */ \ 4200c0d7e18SFande Kong (void)PetscLogFlops(1.0); \ 4219371c9d4SSatish Balay } else ap1[_i] = value; \ 42230770e4dSSatish Balay goto a_noinsert; \ 4230520107fSSatish Balay } \ 4240520107fSSatish Balay } \ 4259371c9d4SSatish Balay if (value == 0.0 && ignorezeroentries && row != col) { \ 4269371c9d4SSatish Balay low1 = 0; \ 4279371c9d4SSatish Balay high1 = nrow1; \ 4289371c9d4SSatish Balay goto a_noinsert; \ 4299371c9d4SSatish Balay } \ 4309371c9d4SSatish Balay if (nonew == 1) { \ 4319371c9d4SSatish Balay low1 = 0; \ 4329371c9d4SSatish Balay high1 = nrow1; \ 4339371c9d4SSatish Balay goto a_noinsert; \ 4349371c9d4SSatish Balay } \ 43508401ef6SPierre Jolivet PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 436fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 4379371c9d4SSatish Balay N = nrow1++ - 1; \ 4389371c9d4SSatish Balay a->nz++; \ 4399371c9d4SSatish Balay high1++; \ 4400520107fSSatish Balay /* shift up all the later entries in this row */ \ 4419566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 4429566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 443fd3458f5SBarry Smith rp1[_i] = col; \ 444fd3458f5SBarry Smith ap1[_i] = value; \ 44530770e4dSSatish Balay a_noinsert:; \ 446fd3458f5SBarry Smith ailen[row] = nrow1; \ 447a8f51744SPierre Jolivet } while (0) 4480a198c4cSBarry Smith 449d40312a9SBarry Smith #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 450a8f51744SPierre Jolivet do { \ 451db4deed7SKarl Rupp if (col <= lastcol2) low2 = 0; \ 452db4deed7SKarl Rupp else high2 = nrow2; \ 453fd3458f5SBarry Smith lastcol2 = col; \ 454fd3458f5SBarry Smith while (high2 - low2 > 5) { \ 455fd3458f5SBarry Smith t = (low2 + high2) / 2; \ 456fd3458f5SBarry Smith if (rp2[t] > col) high2 = t; \ 457fd3458f5SBarry Smith else low2 = t; \ 458ba4e3ef2SSatish Balay } \ 459fd3458f5SBarry Smith for (_i = low2; _i < high2; _i++) { \ 460fd3458f5SBarry Smith if (rp2[_i] > col) break; \ 461fd3458f5SBarry Smith if (rp2[_i] == col) { \ 4620c0d7e18SFande Kong if (addv == ADD_VALUES) { \ 4630c0d7e18SFande Kong ap2[_i] += value; \ 4640c0d7e18SFande Kong (void)PetscLogFlops(1.0); \ 4659371c9d4SSatish Balay } else ap2[_i] = value; \ 46630770e4dSSatish Balay goto b_noinsert; \ 46730770e4dSSatish Balay } \ 46830770e4dSSatish Balay } \ 4699371c9d4SSatish Balay if (value == 0.0 && ignorezeroentries) { \ 4709371c9d4SSatish Balay low2 = 0; \ 4719371c9d4SSatish Balay high2 = nrow2; \ 4729371c9d4SSatish Balay goto b_noinsert; \ 4739371c9d4SSatish Balay } \ 4749371c9d4SSatish Balay if (nonew == 1) { \ 4759371c9d4SSatish Balay low2 = 0; \ 4769371c9d4SSatish Balay high2 = nrow2; \ 4779371c9d4SSatish Balay goto b_noinsert; \ 4789371c9d4SSatish Balay } \ 47908401ef6SPierre Jolivet PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 480fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 4819371c9d4SSatish Balay N = nrow2++ - 1; \ 4829371c9d4SSatish Balay b->nz++; \ 4839371c9d4SSatish Balay high2++; \ 48430770e4dSSatish Balay /* shift up all the later entries in this row */ \ 4859566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 4869566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 487fd3458f5SBarry Smith rp2[_i] = col; \ 488fd3458f5SBarry Smith ap2[_i] = value; \ 48930770e4dSSatish Balay b_noinsert:; \ 490fd3458f5SBarry Smith bilen[row] = nrow2; \ 491a8f51744SPierre Jolivet } while (0) 49230770e4dSSatish Balay 493ba38deedSJacob Faibussowitsch static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 494d71ae5a4SJacob Faibussowitsch { 4952fd7e33dSBarry Smith Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 4962fd7e33dSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 4972fd7e33dSBarry Smith PetscInt l, *garray = mat->garray, diag; 498fff043a9SJunchao Zhang PetscScalar *aa, *ba; 4992fd7e33dSBarry Smith 5002fd7e33dSBarry Smith PetscFunctionBegin; 5012fd7e33dSBarry Smith /* code only works for square matrices A */ 5022fd7e33dSBarry Smith 5032fd7e33dSBarry Smith /* find size of row to the left of the diagonal part */ 5049566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 5052fd7e33dSBarry Smith row = row - diag; 5062fd7e33dSBarry Smith for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 5072fd7e33dSBarry Smith if (garray[b->j[b->i[row] + l]] > diag) break; 5082fd7e33dSBarry Smith } 509fff043a9SJunchao Zhang if (l) { 5109566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 5119566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 5129566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 513fff043a9SJunchao Zhang } 5142fd7e33dSBarry Smith 5152fd7e33dSBarry Smith /* diagonal part */ 516fff043a9SJunchao Zhang if (a->i[row + 1] - a->i[row]) { 5179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 51857508eceSPierre Jolivet PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 5199566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 520fff043a9SJunchao Zhang } 5212fd7e33dSBarry Smith 5222fd7e33dSBarry Smith /* right of diagonal part */ 523fff043a9SJunchao Zhang if (b->i[row + 1] - b->i[row] - l) { 5249566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 5259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 5269566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 527fff043a9SJunchao Zhang } 5283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5292fd7e33dSBarry Smith } 5302fd7e33dSBarry Smith 531d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 532d71ae5a4SJacob Faibussowitsch { 53344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 534071fcb05SBarry Smith PetscScalar value = 0.0; 535d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 536d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 537ace3abfcSBarry Smith PetscBool roworiented = aij->roworiented; 5388a729477SBarry Smith 5390520107fSSatish Balay /* Some Variables required in the macro */ 5404ee7247eSSatish Balay Mat A = aij->A; 5414ee7247eSSatish Balay Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 54257809a77SBarry Smith PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 543ace3abfcSBarry Smith PetscBool ignorezeroentries = a->ignorezeroentries; 54430770e4dSSatish Balay Mat B = aij->B; 54530770e4dSSatish Balay Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 546d0f46423SBarry Smith PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 547ce496241SStefano Zampini MatScalar *aa, *ba; 548fd3458f5SBarry Smith PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 5498d76821aSHong Zhang PetscInt nonew; 550a77337e4SBarry Smith MatScalar *ap1, *ap2; 5514ee7247eSSatish Balay 5523a40ed3dSBarry Smith PetscFunctionBegin; 5539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &aa)); 5549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(B, &ba)); 5558a729477SBarry Smith for (i = 0; i < m; i++) { 5565ef9f2a5SBarry Smith if (im[i] < 0) continue; 55708401ef6SPierre Jolivet PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 5584b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 5594b0e389bSBarry Smith row = im[i] - rstart; 560fd3458f5SBarry Smith lastcol1 = -1; 5618e3a54c0SPierre Jolivet rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 5628e3a54c0SPierre Jolivet ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 563fd3458f5SBarry Smith rmax1 = aimax[row]; 564fd3458f5SBarry Smith nrow1 = ailen[row]; 565fd3458f5SBarry Smith low1 = 0; 566fd3458f5SBarry Smith high1 = nrow1; 567fd3458f5SBarry Smith lastcol2 = -1; 5688e3a54c0SPierre Jolivet rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 5698e3a54c0SPierre Jolivet ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 570fd3458f5SBarry Smith rmax2 = bimax[row]; 571d498b1e9SBarry Smith nrow2 = bilen[row]; 572fd3458f5SBarry Smith low2 = 0; 573fd3458f5SBarry Smith high2 = nrow2; 574fd3458f5SBarry Smith 5751eb62cbbSBarry Smith for (j = 0; j < n; j++) { 576071fcb05SBarry Smith if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 577c80a64e6SBarry Smith if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 578fd3458f5SBarry Smith if (in[j] >= cstart && in[j] < cend) { 579fd3458f5SBarry Smith col = in[j] - cstart; 5808d76821aSHong Zhang nonew = a->nonew; 581d40312a9SBarry Smith MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 582f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 583f7d195e4SLawrence Mitchell continue; 584f7d195e4SLawrence Mitchell } else { 585f7d195e4SLawrence Mitchell PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 586227d817aSBarry Smith if (mat->was_assembled) { 58748a46eb9SPierre Jolivet if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 588aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 589eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 590fa46199cSSatish Balay col--; 591b1fc9764SSatish Balay #else 592905e6a2fSBarry Smith col = aij->colmap[in[j]] - 1; 593b1fc9764SSatish Balay #endif 594f4f49eeaSPierre Jolivet if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 5959566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 5964b0e389bSBarry Smith col = in[j]; 5979bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 598f9508a3cSSatish Balay B = aij->B; 599f9508a3cSSatish Balay b = (Mat_SeqAIJ *)B->data; 6009371c9d4SSatish Balay bimax = b->imax; 6019371c9d4SSatish Balay bi = b->i; 6029371c9d4SSatish Balay bilen = b->ilen; 6039371c9d4SSatish Balay bj = b->j; 6049371c9d4SSatish Balay ba = b->a; 605cff58d65SJunchao Zhang rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 606cff58d65SJunchao Zhang ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 607d498b1e9SBarry Smith rmax2 = bimax[row]; 608d498b1e9SBarry Smith nrow2 = bilen[row]; 609d498b1e9SBarry Smith low2 = 0; 610d498b1e9SBarry Smith high2 = nrow2; 611d0f46423SBarry Smith bm = aij->B->rmap->n; 612f9508a3cSSatish Balay ba = b->a; 613d707bf6cSMatthew Knepley } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 614f4f49eeaSPierre Jolivet if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 6159566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 61698921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 6170587a0fcSBarry Smith } 618c48de900SBarry Smith } else col = in[j]; 6198d76821aSHong Zhang nonew = b->nonew; 620d40312a9SBarry Smith MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 6211eb62cbbSBarry Smith } 6221eb62cbbSBarry Smith } 6235ef9f2a5SBarry Smith } else { 62428b400f6SJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 62590f02eecSBarry Smith if (!aij->donotstash) { 6265080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 627d36fbae8SSatish Balay if (roworiented) { 6288e3a54c0SPierre Jolivet PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629d36fbae8SSatish Balay } else { 6308e3a54c0SPierre Jolivet PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 6314b0e389bSBarry Smith } 6321eb62cbbSBarry Smith } 6338a729477SBarry Smith } 63490f02eecSBarry Smith } 6355519a089SJose E. Roman PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 6369566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(B, &ba)); 6373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6388a729477SBarry Smith } 6398a729477SBarry Smith 6402b08fdbeSandi selinger /* 641904d1e70Sandi selinger This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 6422b08fdbeSandi selinger The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 643904d1e70Sandi selinger No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 6442b08fdbeSandi selinger */ 645d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 646d71ae5a4SJacob Faibussowitsch { 647904d1e70Sandi selinger Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 648904d1e70Sandi selinger Mat A = aij->A; /* diagonal part of the matrix */ 6494cf0e950SBarry Smith Mat B = aij->B; /* off-diagonal part of the matrix */ 650904d1e70Sandi selinger Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 651904d1e70Sandi selinger Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 652904d1e70Sandi selinger PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 653904d1e70Sandi selinger PetscInt *ailen = a->ilen, *aj = a->j; 654904d1e70Sandi selinger PetscInt *bilen = b->ilen, *bj = b->j; 6556dc1ffa3Sandi selinger PetscInt am = aij->A->rmap->n, j; 656904d1e70Sandi selinger PetscInt diag_so_far = 0, dnz; 657904d1e70Sandi selinger PetscInt offd_so_far = 0, onz; 658904d1e70Sandi selinger 659904d1e70Sandi selinger PetscFunctionBegin; 660904d1e70Sandi selinger /* Iterate over all rows of the matrix */ 661904d1e70Sandi selinger for (j = 0; j < am; j++) { 662904d1e70Sandi selinger dnz = onz = 0; 663904d1e70Sandi selinger /* Iterate over all non-zero columns of the current row */ 6646dc1ffa3Sandi selinger for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 665904d1e70Sandi selinger /* If column is in the diagonal */ 666904d1e70Sandi selinger if (mat_j[col] >= cstart && mat_j[col] < cend) { 667904d1e70Sandi selinger aj[diag_so_far++] = mat_j[col] - cstart; 668904d1e70Sandi selinger dnz++; 669904d1e70Sandi selinger } else { /* off-diagonal entries */ 670904d1e70Sandi selinger bj[offd_so_far++] = mat_j[col]; 671904d1e70Sandi selinger onz++; 672904d1e70Sandi selinger } 673904d1e70Sandi selinger } 674904d1e70Sandi selinger ailen[j] = dnz; 675904d1e70Sandi selinger bilen[j] = onz; 676904d1e70Sandi selinger } 6773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 678904d1e70Sandi selinger } 679904d1e70Sandi selinger 680904d1e70Sandi selinger /* 681904d1e70Sandi selinger This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 682904d1e70Sandi selinger The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 6831de21080Sandi selinger No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 6841de21080Sandi selinger Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 6851de21080Sandi selinger would not be true and the more complex MatSetValues_MPIAIJ has to be used. 686904d1e70Sandi selinger */ 687d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 688d71ae5a4SJacob Faibussowitsch { 6893a063d27Sandi selinger Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 6903a063d27Sandi selinger Mat A = aij->A; /* diagonal part of the matrix */ 6914cf0e950SBarry Smith Mat B = aij->B; /* off-diagonal part of the matrix */ 692f4f49eeaSPierre Jolivet Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 6933a063d27Sandi selinger Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 6943a063d27Sandi selinger Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 6953a063d27Sandi selinger PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 6963a063d27Sandi selinger PetscInt *ailen = a->ilen, *aj = a->j; 6973a063d27Sandi selinger PetscInt *bilen = b->ilen, *bj = b->j; 6986dc1ffa3Sandi selinger PetscInt am = aij->A->rmap->n, j; 6991de21080Sandi selinger PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 700904d1e70Sandi selinger PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 701904d1e70Sandi selinger PetscScalar *aa = a->a, *ba = b->a; 7023a063d27Sandi selinger 7033a063d27Sandi selinger PetscFunctionBegin; 7043a063d27Sandi selinger /* Iterate over all rows of the matrix */ 7053a063d27Sandi selinger for (j = 0; j < am; j++) { 706904d1e70Sandi selinger dnz_row = onz_row = 0; 707904d1e70Sandi selinger rowstart_offd = full_offd_i[j]; 708904d1e70Sandi selinger rowstart_diag = full_diag_i[j]; 709e9ede7d0Sandi selinger /* Iterate over all non-zero columns of the current row */ 710e9ede7d0Sandi selinger for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 711ae8e66a0Sandi selinger /* If column is in the diagonal */ 7123a063d27Sandi selinger if (mat_j[col] >= cstart && mat_j[col] < cend) { 713904d1e70Sandi selinger aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 714904d1e70Sandi selinger aa[rowstart_diag + dnz_row] = mat_a[col]; 715904d1e70Sandi selinger dnz_row++; 716ae8e66a0Sandi selinger } else { /* off-diagonal entries */ 717904d1e70Sandi selinger bj[rowstart_offd + onz_row] = mat_j[col]; 718904d1e70Sandi selinger ba[rowstart_offd + onz_row] = mat_a[col]; 719904d1e70Sandi selinger onz_row++; 7203a063d27Sandi selinger } 7213a063d27Sandi selinger } 722904d1e70Sandi selinger ailen[j] = dnz_row; 723904d1e70Sandi selinger bilen[j] = onz_row; 7243a063d27Sandi selinger } 7253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7263a063d27Sandi selinger } 7273a063d27Sandi selinger 728ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 729d71ae5a4SJacob Faibussowitsch { 730b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 731d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 732d0f46423SBarry Smith PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 733b49de8d1SLois Curfman McInnes 7343a40ed3dSBarry Smith PetscFunctionBegin; 735b49de8d1SLois Curfman McInnes for (i = 0; i < m; i++) { 73654c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 73754c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 73885835d77SBarry Smith PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 739b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 740b49de8d1SLois Curfman McInnes for (j = 0; j < n; j++) { 74154c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 74254c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 743b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend) { 744b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 7459566063dSJacob Faibussowitsch PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 746fa852ad4SSatish Balay } else { 74748a46eb9SPierre Jolivet if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 748aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 749eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 750fa46199cSSatish Balay col--; 751b1fc9764SSatish Balay #else 752905e6a2fSBarry Smith col = aij->colmap[idxn[j]] - 1; 753b1fc9764SSatish Balay #endif 754e60e1c95SSatish Balay if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 75548a46eb9SPierre Jolivet else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 756b49de8d1SLois Curfman McInnes } 757b49de8d1SLois Curfman McInnes } 758b49de8d1SLois Curfman McInnes } 7593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 760b49de8d1SLois Curfman McInnes } 761bc5ccf88SSatish Balay 762ba38deedSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 763d71ae5a4SJacob Faibussowitsch { 764bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 765b1d57f15SBarry Smith PetscInt nstash, reallocs; 766bc5ccf88SSatish Balay 767bc5ccf88SSatish Balay PetscFunctionBegin; 7683ba16761SJacob Faibussowitsch if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 769bc5ccf88SSatish Balay 7709566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 7719566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 7729566063dSJacob Faibussowitsch PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 7733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 774bc5ccf88SSatish Balay } 775bc5ccf88SSatish Balay 776d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 777d71ae5a4SJacob Faibussowitsch { 778bc5ccf88SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 779b1d57f15SBarry Smith PetscMPIInt n; 780b1d57f15SBarry Smith PetscInt i, j, rstart, ncols, flg; 781e44c0bd4SBarry Smith PetscInt *row, *col; 782ace3abfcSBarry Smith PetscBool other_disassembled; 78387828ca2SBarry Smith PetscScalar *val; 784bc5ccf88SSatish Balay 78591c97fd4SSatish Balay /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 7866e111a19SKarl Rupp 787bc5ccf88SSatish Balay PetscFunctionBegin; 7884cb17eb5SBarry Smith if (!aij->donotstash && !mat->nooffprocentries) { 789a2d1c673SSatish Balay while (1) { 7909566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 791a2d1c673SSatish Balay if (!flg) break; 792a2d1c673SSatish Balay 793bc5ccf88SSatish Balay for (i = 0; i < n;) { 794bc5ccf88SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 7952205254eSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 7962205254eSKarl Rupp if (row[j] != rstart) break; 7972205254eSKarl Rupp } 798bc5ccf88SSatish Balay if (j < n) ncols = j - i; 799bc5ccf88SSatish Balay else ncols = n - i; 800bc5ccf88SSatish Balay /* Now assemble all these values with a single function call */ 8019566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 802bc5ccf88SSatish Balay i = j; 803bc5ccf88SSatish Balay } 804bc5ccf88SSatish Balay } 8059566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 806bc5ccf88SSatish Balay } 8078c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 808c70f7ee4SJunchao Zhang if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 8099ecce9b1SRichard Tran Mills /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 8109ecce9b1SRichard Tran Mills if (mat->boundtocpu) { 8119566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 8129566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 8139ecce9b1SRichard Tran Mills } 814e2cf4d64SStefano Zampini #endif 8159566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(aij->A, mode)); 8169566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(aij->A, mode)); 817bc5ccf88SSatish Balay 818bc5ccf88SSatish Balay /* determine if any processor has disassembled, if so we must 819071fcb05SBarry Smith also disassemble ourself, in order that we may reassemble. */ 820bc5ccf88SSatish Balay /* 821bc5ccf88SSatish Balay if nonzero structure of submatrix B cannot change then we know that 822bc5ccf88SSatish Balay no processor disassembled thus we can skip this stuff 823bc5ccf88SSatish Balay */ 824bc5ccf88SSatish Balay if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 825462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 82635cb6cd3SPierre Jolivet if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 8279566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIAIJ(mat)); 828ad59fb31SSatish Balay } 829ad59fb31SSatish Balay } 83048a46eb9SPierre Jolivet if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 8319566063dSJacob Faibussowitsch PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 8328c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 833c70f7ee4SJunchao Zhang if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 834e2cf4d64SStefano Zampini #endif 8359566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(aij->B, mode)); 8369566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(aij->B, mode)); 837bc5ccf88SSatish Balay 8389566063dSJacob Faibussowitsch PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 8392205254eSKarl Rupp 840f4259b30SLisandro Dalcin aij->rowvalues = NULL; 841a30b2313SHong Zhang 8429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->diag)); 843e56f5c9eSBarry Smith 8444f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845f4f49eeaSPierre Jolivet if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 846e56f5c9eSBarry Smith PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 848e56f5c9eSBarry Smith } 8498c3ff71bSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 850c70f7ee4SJunchao Zhang mat->offloadmask = PETSC_OFFLOAD_BOTH; 851e2cf4d64SStefano Zampini #endif 8523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 853bc5ccf88SSatish Balay } 854bc5ccf88SSatish Balay 855ba38deedSJacob Faibussowitsch static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 856d71ae5a4SJacob Faibussowitsch { 85744a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 8583a40ed3dSBarry Smith 8593a40ed3dSBarry Smith PetscFunctionBegin; 8609566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 8619566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 8623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8631eb62cbbSBarry Smith } 8641eb62cbbSBarry Smith 865ba38deedSJacob Faibussowitsch static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 866d71ae5a4SJacob Faibussowitsch { 8671b1dd7adSMatthew G. Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 8681b1dd7adSMatthew G. Knepley PetscInt *lrows; 8696e520ac8SStefano Zampini PetscInt r, len; 8709939a2d1SBarry Smith PetscBool cong; 8711eb62cbbSBarry Smith 8723a40ed3dSBarry Smith PetscFunctionBegin; 8736e520ac8SStefano Zampini /* get locally owned rows */ 8749566063dSJacob Faibussowitsch PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 8759566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A, &cong)); 876dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 87797b48c8fSBarry Smith if (x && b) { 8781b1dd7adSMatthew G. Knepley const PetscScalar *xx; 8791b1dd7adSMatthew G. Knepley PetscScalar *bb; 8801b1dd7adSMatthew G. Knepley 88128b400f6SJacob Faibussowitsch PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 8829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 8839566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 8841b1dd7adSMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 8859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 8869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 88797b48c8fSBarry Smith } 888a92ad425SStefano Zampini 889a92ad425SStefano Zampini if (diag != 0.0 && cong) { 8909566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 8919566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 892a92ad425SStefano Zampini } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 893a92ad425SStefano Zampini Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 894a92ad425SStefano Zampini Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 895a92ad425SStefano Zampini PetscInt nnwA, nnwB; 896a92ad425SStefano Zampini PetscBool nnzA, nnzB; 897a92ad425SStefano Zampini 898a92ad425SStefano Zampini nnwA = aijA->nonew; 899a92ad425SStefano Zampini nnwB = aijB->nonew; 900a92ad425SStefano Zampini nnzA = aijA->keepnonzeropattern; 901a92ad425SStefano Zampini nnzB = aijB->keepnonzeropattern; 902a92ad425SStefano Zampini if (!nnzA) { 9039566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 904a92ad425SStefano Zampini aijA->nonew = 0; 905a92ad425SStefano Zampini } 906a92ad425SStefano Zampini if (!nnzB) { 9079566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 908a92ad425SStefano Zampini aijB->nonew = 0; 909a92ad425SStefano Zampini } 910a92ad425SStefano Zampini /* Must zero here before the next loop */ 9119566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 9129566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 9131b1dd7adSMatthew G. Knepley for (r = 0; r < len; ++r) { 9141b1dd7adSMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 915a92ad425SStefano Zampini if (row >= A->cmap->N) continue; 9169566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 917e2d53e46SBarry Smith } 918a92ad425SStefano Zampini aijA->nonew = nnwA; 919a92ad425SStefano Zampini aijB->nonew = nnwB; 9206eb55b6aSBarry Smith } else { 9219566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 9229566063dSJacob Faibussowitsch PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 9236eb55b6aSBarry Smith } 9249566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 9259566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 9269566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 9274f9cfa9eSBarry Smith 9289939a2d1SBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 929f4f49eeaSPierre Jolivet if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 9309939a2d1SBarry Smith PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 931462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 9329939a2d1SBarry Smith } 9333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9341eb62cbbSBarry Smith } 9351eb62cbbSBarry Smith 936ba38deedSJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 937d71ae5a4SJacob Faibussowitsch { 9389c7c4993SBarry Smith Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 9396497c311SBarry Smith PetscInt n = A->rmap->n; 940131c27b5Sprj- PetscInt i, j, r, m, len = 0; 94154bd4135SMatthew G. Knepley PetscInt *lrows, *owners = A->rmap->range; 942131c27b5Sprj- PetscMPIInt p = 0; 94354bd4135SMatthew G. Knepley PetscSFNode *rrows; 94454bd4135SMatthew G. Knepley PetscSF sf; 9459c7c4993SBarry Smith const PetscScalar *xx; 946fff043a9SJunchao Zhang PetscScalar *bb, *mask, *aij_a; 947564f14d6SBarry Smith Vec xmask, lmask; 948564f14d6SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 949564f14d6SBarry Smith const PetscInt *aj, *ii, *ridx; 950564f14d6SBarry Smith PetscScalar *aa; 9519c7c4993SBarry Smith 9529c7c4993SBarry Smith PetscFunctionBegin; 95354bd4135SMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 9549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &lrows)); 95554bd4135SMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N, &rrows)); 95754bd4135SMatthew G. Knepley for (r = 0; r < N; ++r) { 95854bd4135SMatthew G. Knepley const PetscInt idx = rows[r]; 959aed4548fSBarry Smith PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 9605ba17502SJed Brown if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 9619566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 9625ba17502SJed Brown } 96354bd4135SMatthew G. Knepley rrows[r].rank = p; 96454bd4135SMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 9659c7c4993SBarry Smith } 9669566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 9679566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 96854bd4135SMatthew G. Knepley /* Collect flags for rows to be zeroed */ 9699566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 9709566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 9719566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 97254bd4135SMatthew G. Knepley /* Compress and put in row numbers */ 9739371c9d4SSatish Balay for (r = 0; r < n; ++r) 9749371c9d4SSatish Balay if (lrows[r] >= 0) lrows[len++] = r; 975564f14d6SBarry Smith /* zero diagonal part of matrix */ 9769566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 9774cf0e950SBarry Smith /* handle off-diagonal part of matrix */ 9789566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &xmask, NULL)); 9799566063dSJacob Faibussowitsch PetscCall(VecDuplicate(l->lvec, &lmask)); 9809566063dSJacob Faibussowitsch PetscCall(VecGetArray(xmask, &bb)); 98154bd4135SMatthew G. Knepley for (i = 0; i < len; i++) bb[lrows[i]] = 1; 9829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xmask, &bb)); 9839566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 9849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 9859566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xmask)); 986a92ad425SStefano Zampini if (x && b) { /* this code is buggy when the row and column layout don't match */ 987a92ad425SStefano Zampini PetscBool cong; 988a92ad425SStefano Zampini 9899566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A, &cong)); 99028b400f6SJacob Faibussowitsch PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 9919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 9929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 9939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(l->lvec, &xx)); 9949566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 995377aa5a1SBarry Smith } 9969566063dSJacob Faibussowitsch PetscCall(VecGetArray(lmask, &mask)); 9974cf0e950SBarry Smith /* remove zeroed rows of off-diagonal matrix */ 9989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 999564f14d6SBarry Smith ii = aij->i; 10008e3a54c0SPierre Jolivet for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 1001564f14d6SBarry Smith /* loop over all elements of off process part of matrix zeroing removed columns*/ 1002564f14d6SBarry Smith if (aij->compressedrow.use) { 1003564f14d6SBarry Smith m = aij->compressedrow.nrows; 1004564f14d6SBarry Smith ii = aij->compressedrow.i; 1005564f14d6SBarry Smith ridx = aij->compressedrow.rindex; 1006564f14d6SBarry Smith for (i = 0; i < m; i++) { 1007564f14d6SBarry Smith n = ii[i + 1] - ii[i]; 1008564f14d6SBarry Smith aj = aij->j + ii[i]; 1009fff043a9SJunchao Zhang aa = aij_a + ii[i]; 1010564f14d6SBarry Smith 1011564f14d6SBarry Smith for (j = 0; j < n; j++) { 101225266a92SSatish Balay if (PetscAbsScalar(mask[*aj])) { 1013377aa5a1SBarry Smith if (b) bb[*ridx] -= *aa * xx[*aj]; 1014564f14d6SBarry Smith *aa = 0.0; 1015564f14d6SBarry Smith } 1016564f14d6SBarry Smith aa++; 1017564f14d6SBarry Smith aj++; 1018564f14d6SBarry Smith } 1019564f14d6SBarry Smith ridx++; 1020564f14d6SBarry Smith } 1021564f14d6SBarry Smith } else { /* do not use compressed row format */ 1022564f14d6SBarry Smith m = l->B->rmap->n; 1023564f14d6SBarry Smith for (i = 0; i < m; i++) { 1024564f14d6SBarry Smith n = ii[i + 1] - ii[i]; 1025564f14d6SBarry Smith aj = aij->j + ii[i]; 1026fff043a9SJunchao Zhang aa = aij_a + ii[i]; 1027564f14d6SBarry Smith for (j = 0; j < n; j++) { 102825266a92SSatish Balay if (PetscAbsScalar(mask[*aj])) { 1029377aa5a1SBarry Smith if (b) bb[i] -= *aa * xx[*aj]; 1030564f14d6SBarry Smith *aa = 0.0; 1031564f14d6SBarry Smith } 1032564f14d6SBarry Smith aa++; 1033564f14d6SBarry Smith aj++; 1034564f14d6SBarry Smith } 1035564f14d6SBarry Smith } 1036564f14d6SBarry Smith } 1037a92ad425SStefano Zampini if (x && b) { 10389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 10399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1040377aa5a1SBarry Smith } 10419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 10429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lmask, &mask)); 10439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lmask)); 10449566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 10454f9cfa9eSBarry Smith 10464f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 1047f4f49eeaSPierre Jolivet if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 10484f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1049462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 10504f9cfa9eSBarry Smith } 10513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10529c7c4993SBarry Smith } 10539c7c4993SBarry Smith 1054ba38deedSJacob Faibussowitsch static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1055d71ae5a4SJacob Faibussowitsch { 1056416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1057b1d57f15SBarry Smith PetscInt nt; 105819b3b6edSHong Zhang VecScatter Mvctx = a->Mvctx; 1059416022c9SBarry Smith 10603a40ed3dSBarry Smith PetscFunctionBegin; 10619566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx, &nt)); 106208401ef6SPierre Jolivet PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 10639566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064296d8154SBarry Smith PetscUseTypeMethod(a->A, mult, xx, yy); 10659566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066296d8154SBarry Smith PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 10673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10681eb62cbbSBarry Smith } 10691eb62cbbSBarry Smith 1070ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1071d71ae5a4SJacob Faibussowitsch { 1072bd0c2dcbSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073bd0c2dcbSBarry Smith 1074bd0c2dcbSBarry Smith PetscFunctionBegin; 10759566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 10763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1077bd0c2dcbSBarry Smith } 1078bd0c2dcbSBarry Smith 1079ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1080d71ae5a4SJacob Faibussowitsch { 1081416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 108201ad2aeeSHong Zhang VecScatter Mvctx = a->Mvctx; 10833a40ed3dSBarry Smith 10843a40ed3dSBarry Smith PetscFunctionBegin; 10859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 10869566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 10879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 10889566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 10893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1090da3a660dSBarry Smith } 1091da3a660dSBarry Smith 1092ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1093d71ae5a4SJacob Faibussowitsch { 1094416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1095da3a660dSBarry Smith 10963a40ed3dSBarry Smith PetscFunctionBegin; 1097da3a660dSBarry Smith /* do nondiagonal part */ 10989566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1099da3a660dSBarry Smith /* do local part */ 11009566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 11019613dc34SJunchao Zhang /* add partial results together */ 11029566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 11039566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 11043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1105da3a660dSBarry Smith } 1106da3a660dSBarry Smith 1107ba38deedSJacob Faibussowitsch static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1108d71ae5a4SJacob Faibussowitsch { 11094f423910Svictorle MPI_Comm comm; 1110ad79cf63SBarry Smith Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1111ad79cf63SBarry Smith Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1112cd0d46ebSvictorle IS Me, Notme; 1113b1d57f15SBarry Smith PetscInt M, N, first, last, *notme, i; 111454d735aeSStefano Zampini PetscBool lf; 1115b1d57f15SBarry Smith PetscMPIInt size; 1116cd0d46ebSvictorle 1117cd0d46ebSvictorle PetscFunctionBegin; 111842e5f5b4Svictorle /* Easy test: symmetric diagonal block */ 11199566063dSJacob Faibussowitsch PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1120462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 11213ba16761SJacob Faibussowitsch if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 11229566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 11239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 11243ba16761SJacob Faibussowitsch if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 112542e5f5b4Svictorle 11267dae84e0SHong Zhang /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 11279566063dSJacob Faibussowitsch PetscCall(MatGetSize(Amat, &M, &N)); 11289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 11299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N - last + first, ¬me)); 1130cd0d46ebSvictorle for (i = 0; i < first; i++) notme[i] = i; 1131cd0d46ebSvictorle for (i = last; i < M; i++) notme[i - last + first] = i; 11329566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 11339566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 11349566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 113566501d38Svictorle Aoff = Aoffs[0]; 11369566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 113766501d38Svictorle Boff = Boffs[0]; 11389566063dSJacob Faibussowitsch PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 11399566063dSJacob Faibussowitsch PetscCall(MatDestroyMatrices(1, &Aoffs)); 11409566063dSJacob Faibussowitsch PetscCall(MatDestroyMatrices(1, &Boffs)); 11419566063dSJacob Faibussowitsch PetscCall(ISDestroy(&Me)); 11429566063dSJacob Faibussowitsch PetscCall(ISDestroy(&Notme)); 11439566063dSJacob Faibussowitsch PetscCall(PetscFree(notme)); 11443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1145cd0d46ebSvictorle } 1146cd0d46ebSvictorle 1147ba38deedSJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1148d71ae5a4SJacob Faibussowitsch { 1149416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1150da3a660dSBarry Smith 11513a40ed3dSBarry Smith PetscFunctionBegin; 1152da3a660dSBarry Smith /* do nondiagonal part */ 11539566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1154da3a660dSBarry Smith /* do local part */ 11559566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 11569613dc34SJunchao Zhang /* add partial results together */ 11579566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 11589566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 11593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1160da3a660dSBarry Smith } 1161da3a660dSBarry Smith 11621eb62cbbSBarry Smith /* 11631eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 11641eb62cbbSBarry Smith diagonal block 11651eb62cbbSBarry Smith */ 1166ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1167d71ae5a4SJacob Faibussowitsch { 1168416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 11693a40ed3dSBarry Smith 11703a40ed3dSBarry Smith PetscFunctionBegin; 117108401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1172aed4548fSBarry Smith PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 11739566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(a->A, v)); 11743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11751eb62cbbSBarry Smith } 11761eb62cbbSBarry Smith 1177ba38deedSJacob Faibussowitsch static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1178d71ae5a4SJacob Faibussowitsch { 1179052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 11803a40ed3dSBarry Smith 11813a40ed3dSBarry Smith PetscFunctionBegin; 11829566063dSJacob Faibussowitsch PetscCall(MatScale(a->A, aa)); 11839566063dSJacob Faibussowitsch PetscCall(MatScale(a->B, aa)); 11843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1185052efed2SBarry Smith } 1186052efed2SBarry Smith 1187ba38deedSJacob Faibussowitsch static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1188d71ae5a4SJacob Faibussowitsch { 11898e2fed03SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 11908e2fed03SBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 11918e2fed03SBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 11923ea6fe3dSLisandro Dalcin const PetscInt *garray = aij->garray; 11932e5835c6SStefano Zampini const PetscScalar *aa, *ba; 119417a3732bSBarry Smith PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 119517a3732bSBarry Smith PetscInt64 nz, hnz; 11963ea6fe3dSLisandro Dalcin PetscInt *rowlens; 11973ea6fe3dSLisandro Dalcin PetscInt *colidxs; 11983ea6fe3dSLisandro Dalcin PetscScalar *matvals; 119917a3732bSBarry Smith PetscMPIInt rank; 12008e2fed03SBarry Smith 12018e2fed03SBarry Smith PetscFunctionBegin; 12029566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 12033ea6fe3dSLisandro Dalcin 12043ea6fe3dSLisandro Dalcin M = mat->rmap->N; 12053ea6fe3dSLisandro Dalcin N = mat->cmap->N; 12063ea6fe3dSLisandro Dalcin m = mat->rmap->n; 12073ea6fe3dSLisandro Dalcin rs = mat->rmap->rstart; 12083ea6fe3dSLisandro Dalcin cs = mat->cmap->rstart; 12098e2fed03SBarry Smith nz = A->nz + B->nz; 12103ea6fe3dSLisandro Dalcin 12113ea6fe3dSLisandro Dalcin /* write matrix header */ 12120700a824SBarry Smith header[0] = MAT_FILE_CLASSID; 12139371c9d4SSatish Balay header[1] = M; 12149371c9d4SSatish Balay header[2] = N; 121517a3732bSBarry Smith PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 121617a3732bSBarry Smith PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1217835f2295SStefano Zampini if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 12189566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 12198e2fed03SBarry Smith 12203ea6fe3dSLisandro Dalcin /* fill in and store row lengths */ 12219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 12223ea6fe3dSLisandro Dalcin for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 12239566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 12249566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 12258e2fed03SBarry Smith 12263ea6fe3dSLisandro Dalcin /* fill in and store column indices */ 12279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 12283ea6fe3dSLisandro Dalcin for (cnt = 0, i = 0; i < m; i++) { 12293ea6fe3dSLisandro Dalcin for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 12303ea6fe3dSLisandro Dalcin if (garray[B->j[jb]] > cs) break; 12313ea6fe3dSLisandro Dalcin colidxs[cnt++] = garray[B->j[jb]]; 12328e2fed03SBarry Smith } 12339371c9d4SSatish Balay for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 12349371c9d4SSatish Balay for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 12358e2fed03SBarry Smith } 123617a3732bSBarry Smith PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 12379566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 12389566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 12398e2fed03SBarry Smith 12403ea6fe3dSLisandro Dalcin /* fill in and store nonzero values */ 12419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 12429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 12439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 12443ea6fe3dSLisandro Dalcin for (cnt = 0, i = 0; i < m; i++) { 12453ea6fe3dSLisandro Dalcin for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 12463ea6fe3dSLisandro Dalcin if (garray[B->j[jb]] > cs) break; 12472e5835c6SStefano Zampini matvals[cnt++] = ba[jb]; 12488e2fed03SBarry Smith } 12499371c9d4SSatish Balay for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 12509371c9d4SSatish Balay for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 12518e2fed03SBarry Smith } 12529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 12539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 125417a3732bSBarry Smith PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 12559566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 12569566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 12578e2fed03SBarry Smith 12583ea6fe3dSLisandro Dalcin /* write block size option to the viewer's .info file */ 12599566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 12603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12618e2fed03SBarry Smith } 12628e2fed03SBarry Smith 12639804daf3SBarry Smith #include <petscdraw.h> 1264ba38deedSJacob Faibussowitsch static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1265d71ae5a4SJacob Faibussowitsch { 126644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 126732dcc486SBarry Smith PetscMPIInt rank = aij->rank, size = aij->size; 1268ace3abfcSBarry Smith PetscBool isdraw, iascii, isbinary; 1269b0a32e0cSBarry Smith PetscViewer sviewer; 1270f3ef73ceSBarry Smith PetscViewerFormat format; 1271416022c9SBarry Smith 12723a40ed3dSBarry Smith PetscFunctionBegin; 12739566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 12749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 12759566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 127632077d6dSBarry Smith if (iascii) { 12779566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1278ef5fdb51SBarry Smith if (format == PETSC_VIEWER_LOAD_BALANCE) { 12791690c2aeSBarry Smith PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 12809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &nz)); 12819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1282835f2295SStefano Zampini for (i = 0; i < size; i++) { 1283ef5fdb51SBarry Smith nmax = PetscMax(nmax, nz[i]); 1284ef5fdb51SBarry Smith nmin = PetscMin(nmin, nz[i]); 1285ef5fdb51SBarry Smith navg += nz[i]; 1286ef5fdb51SBarry Smith } 12879566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1288ef5fdb51SBarry Smith navg = navg / size; 12899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 12903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1291ef5fdb51SBarry Smith } 12929566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 1293456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 12944e220ebcSLois Curfman McInnes MatInfo info; 12956335e310SSatish Balay PetscInt *inodes = NULL; 1296923f20ffSKris Buschelman 12979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 12989566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 12999566063dSJacob Faibussowitsch PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 13009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1301923f20ffSKris Buschelman if (!inodes) { 13029371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1303835f2295SStefano Zampini info.memory)); 13046831982aSBarry Smith } else { 1305835f2295SStefano Zampini PetscCall( 1306835f2295SStefano Zampini PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 13076831982aSBarry Smith } 13089566063dSJacob Faibussowitsch PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 13099566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 13109566063dSJacob Faibussowitsch PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 13119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 13129566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 13139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 13149566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 13159566063dSJacob Faibussowitsch PetscCall(VecScatterView(aij->Mvctx, viewer)); 13163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1317fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1318923f20ffSKris Buschelman PetscInt inodecount, inodelimit, *inodes; 13199566063dSJacob Faibussowitsch PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1320923f20ffSKris Buschelman if (inodes) { 13219566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1322d38fa0fbSBarry Smith } else { 13239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1324d38fa0fbSBarry Smith } 13253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13264aedb280SBarry Smith } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 13273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 132808480c60SBarry Smith } 13298e2fed03SBarry Smith } else if (isbinary) { 13308e2fed03SBarry Smith if (size == 1) { 13319566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 13329566063dSJacob Faibussowitsch PetscCall(MatView(aij->A, viewer)); 13338e2fed03SBarry Smith } else { 13349566063dSJacob Faibussowitsch PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 13358e2fed03SBarry Smith } 13363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 133771e56450SStefano Zampini } else if (iascii && size == 1) { 13389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 13399566063dSJacob Faibussowitsch PetscCall(MatView(aij->A, viewer)); 13403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13410f5bd95cSBarry Smith } else if (isdraw) { 1342b0a32e0cSBarry Smith PetscDraw draw; 1343ace3abfcSBarry Smith PetscBool isnull; 13449566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 13459566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 13463ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 134719bcc07fSBarry Smith } 134819bcc07fSBarry Smith 134971e56450SStefano Zampini { /* assemble the entire matrix onto first processor */ 135071e56450SStefano Zampini Mat A = NULL, Av; 135171e56450SStefano Zampini IS isrow, iscol; 13522ee70a88SLois Curfman McInnes 13539566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 13549566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 13559566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 13569566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 135771e56450SStefano Zampini /* The commented code uses MatCreateSubMatrices instead */ 135871e56450SStefano Zampini /* 135971e56450SStefano Zampini Mat *AA, A = NULL, Av; 136071e56450SStefano Zampini IS isrow,iscol; 136171e56450SStefano Zampini 13629566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 13639566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 13649566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1365dd400576SPatrick Sanan if (rank == 0) { 13669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)AA[0])); 136771e56450SStefano Zampini A = AA[0]; 136871e56450SStefano Zampini Av = AA[0]; 136995373324SBarry Smith } 13709566063dSJacob Faibussowitsch PetscCall(MatDestroySubMatrices(1,&AA)); 137171e56450SStefano Zampini */ 13729566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol)); 13739566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrow)); 137455843e3eSBarry Smith /* 137555843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1376b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 137755843e3eSBarry Smith */ 13789566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1379dd400576SPatrick Sanan if (rank == 0) { 138048a46eb9SPierre Jolivet if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 13819566063dSJacob Faibussowitsch PetscCall(MatView_SeqAIJ(Av, sviewer)); 138295373324SBarry Smith } 13839566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 13849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 138595373324SBarry Smith } 13863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13871eb62cbbSBarry Smith } 13881eb62cbbSBarry Smith 1389d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1390d71ae5a4SJacob Faibussowitsch { 1391ace3abfcSBarry Smith PetscBool iascii, isdraw, issocket, isbinary; 1392416022c9SBarry Smith 13933a40ed3dSBarry Smith PetscFunctionBegin; 13949566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 13959566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 13969566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 13979566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 139848a46eb9SPierre Jolivet if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 13993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1400416022c9SBarry Smith } 1401416022c9SBarry Smith 1402ba38deedSJacob Faibussowitsch static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1403d71ae5a4SJacob Faibussowitsch { 140444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1405f4259b30SLisandro Dalcin Vec bb1 = NULL; 1406ace3abfcSBarry Smith PetscBool hasop; 14078a729477SBarry Smith 14083a40ed3dSBarry Smith PetscFunctionBegin; 1409a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 14109566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 14113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1412a2b30743SBarry Smith } 1413a2b30743SBarry Smith 141448a46eb9SPierre Jolivet if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 14154e980039SJed Brown 1416c16cb8f2SBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1417da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 14189566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 14192798e883SHong Zhang its--; 1420da3a660dSBarry Smith } 14212798e883SHong Zhang 14222798e883SHong Zhang while (its--) { 14239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14249566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14252798e883SHong Zhang 1426c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 14279566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec, -1.0)); 14289566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 14292798e883SHong Zhang 1430c14dc6b6SHong Zhang /* local sweep */ 14319566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 14322798e883SHong Zhang } 14333a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1434da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 14359566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 14362798e883SHong Zhang its--; 1437da3a660dSBarry Smith } 14382798e883SHong Zhang while (its--) { 14399566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14409566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14412798e883SHong Zhang 1442c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 14439566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec, -1.0)); 14449566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1445c14dc6b6SHong Zhang 1446c14dc6b6SHong Zhang /* local sweep */ 14479566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 14482798e883SHong Zhang } 14493a40ed3dSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1450da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 14519566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 14522798e883SHong Zhang its--; 1453da3a660dSBarry Smith } 14542798e883SHong Zhang while (its--) { 14559566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14569566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14572798e883SHong Zhang 1458c14dc6b6SHong Zhang /* update rhs: bb1 = bb - B*x */ 14599566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec, -1.0)); 14609566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 14612798e883SHong Zhang 1462c14dc6b6SHong Zhang /* local sweep */ 14639566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 14642798e883SHong Zhang } 1465a7420bb7SBarry Smith } else if (flag & SOR_EISENSTAT) { 1466a7420bb7SBarry Smith Vec xx1; 1467a7420bb7SBarry Smith 14689566063dSJacob Faibussowitsch PetscCall(VecDuplicate(bb, &xx1)); 14699566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1470a7420bb7SBarry Smith 14719566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 14729566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473a7420bb7SBarry Smith if (!mat->diag) { 14749566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 14759566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(matin, mat->diag)); 1476a7420bb7SBarry Smith } 14779566063dSJacob Faibussowitsch PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1478bd0c2dcbSBarry Smith if (hasop) { 14799566063dSJacob Faibussowitsch PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1480bd0c2dcbSBarry Smith } else { 14819566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1482bd0c2dcbSBarry Smith } 14839566063dSJacob Faibussowitsch PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1484887ee2caSBarry Smith 14859566063dSJacob Faibussowitsch PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1486a7420bb7SBarry Smith 1487a7420bb7SBarry Smith /* local sweep */ 14889566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 14899566063dSJacob Faibussowitsch PetscCall(VecAXPY(xx, 1.0, xx1)); 14909566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xx1)); 1491ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1492c14dc6b6SHong Zhang 14939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 1494a0808db4SHong Zhang 14957b6c816cSBarry Smith matin->factorerrortype = mat->A->factorerrortype; 14963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14978a729477SBarry Smith } 1498a66be287SLois Curfman McInnes 1499ba38deedSJacob Faibussowitsch static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1500d71ae5a4SJacob Faibussowitsch { 150172e6a0cfSJed Brown Mat aA, aB, Aperm; 150272e6a0cfSJed Brown const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 150372e6a0cfSJed Brown PetscScalar *aa, *ba; 150472e6a0cfSJed Brown PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 150572e6a0cfSJed Brown PetscSF rowsf, sf; 15060298fd71SBarry Smith IS parcolp = NULL; 150772e6a0cfSJed Brown PetscBool done; 150842e855d1Svictor 150942e855d1Svictor PetscFunctionBegin; 15109566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, &n)); 15119566063dSJacob Faibussowitsch PetscCall(ISGetIndices(rowp, &rwant)); 15129566063dSJacob Faibussowitsch PetscCall(ISGetIndices(colp, &cwant)); 15139566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 151472e6a0cfSJed Brown 151572e6a0cfSJed Brown /* Invert row permutation to find out where my rows should go */ 15169566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 15179566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 15189566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(rowsf)); 151972e6a0cfSJed Brown for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 15209566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 15219566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 152272e6a0cfSJed Brown 152372e6a0cfSJed Brown /* Invert column permutation to find out where my columns should go */ 15249566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 15259566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 15269566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 152772e6a0cfSJed Brown for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 15289566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 15299566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 15309566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 153172e6a0cfSJed Brown 15329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(rowp, &rwant)); 15339566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(colp, &cwant)); 15349566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 153572e6a0cfSJed Brown 153672e6a0cfSJed Brown /* Find out where my gcols should go */ 15379566063dSJacob Faibussowitsch PetscCall(MatGetSize(aB, NULL, &ng)); 15389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ng, &gcdest)); 15399566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 15409566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 15419566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 15429566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 15439566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 15449566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 154572e6a0cfSJed Brown 15469566063dSJacob Faibussowitsch PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 15479566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 15489566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 154972e6a0cfSJed Brown for (i = 0; i < m; i++) { 1550131c27b5Sprj- PetscInt row = rdest[i]; 1551131c27b5Sprj- PetscMPIInt rowner; 15529566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 155372e6a0cfSJed Brown for (j = ai[i]; j < ai[i + 1]; j++) { 1554131c27b5Sprj- PetscInt col = cdest[aj[j]]; 1555131c27b5Sprj- PetscMPIInt cowner; 15569566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 155772e6a0cfSJed Brown if (rowner == cowner) dnnz[i]++; 155872e6a0cfSJed Brown else onnz[i]++; 155972e6a0cfSJed Brown } 156072e6a0cfSJed Brown for (j = bi[i]; j < bi[i + 1]; j++) { 1561131c27b5Sprj- PetscInt col = gcdest[bj[j]]; 1562131c27b5Sprj- PetscMPIInt cowner; 15639566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 156472e6a0cfSJed Brown if (rowner == cowner) dnnz[i]++; 156572e6a0cfSJed Brown else onnz[i]++; 156672e6a0cfSJed Brown } 156772e6a0cfSJed Brown } 15689566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 15699566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 15709566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 15719566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 15729566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&rowsf)); 157372e6a0cfSJed Brown 15749566063dSJacob Faibussowitsch PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 15759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(aA, &aa)); 15769566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(aB, &ba)); 157772e6a0cfSJed Brown for (i = 0; i < m; i++) { 157872e6a0cfSJed Brown PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1579970468b0SJed Brown PetscInt j0, rowlen; 158072e6a0cfSJed Brown rowlen = ai[i + 1] - ai[i]; 1581970468b0SJed Brown for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582970468b0SJed Brown for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 15839566063dSJacob Faibussowitsch PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1584970468b0SJed Brown } 158572e6a0cfSJed Brown rowlen = bi[i + 1] - bi[i]; 1586970468b0SJed Brown for (j0 = j = 0; j < rowlen; j0 = j) { 1587970468b0SJed Brown for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 15889566063dSJacob Faibussowitsch PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1589970468b0SJed Brown } 159072e6a0cfSJed Brown } 15919566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 15929566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 15939566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 15949566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 15959566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 15969566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 15979566063dSJacob Faibussowitsch PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 15989566063dSJacob Faibussowitsch PetscCall(PetscFree3(work, rdest, cdest)); 15999566063dSJacob Faibussowitsch PetscCall(PetscFree(gcdest)); 16009566063dSJacob Faibussowitsch if (parcolp) PetscCall(ISDestroy(&colp)); 160172e6a0cfSJed Brown *B = Aperm; 16023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 160342e855d1Svictor } 160442e855d1Svictor 1605ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1606d71ae5a4SJacob Faibussowitsch { 1607c5e4d11fSDmitry Karpeev Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1608c5e4d11fSDmitry Karpeev 1609c5e4d11fSDmitry Karpeev PetscFunctionBegin; 16109566063dSJacob Faibussowitsch PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1611c5e4d11fSDmitry Karpeev if (ghosts) *ghosts = aij->garray; 16123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1613c5e4d11fSDmitry Karpeev } 1614c5e4d11fSDmitry Karpeev 1615ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1616d71ae5a4SJacob Faibussowitsch { 1617a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1618a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 16193966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 1620a66be287SLois Curfman McInnes 16213a40ed3dSBarry Smith PetscFunctionBegin; 16224e220ebcSLois Curfman McInnes info->block_size = 1.0; 16239566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 16242205254eSKarl Rupp 16259371c9d4SSatish Balay isend[0] = info->nz_used; 16269371c9d4SSatish Balay isend[1] = info->nz_allocated; 16279371c9d4SSatish Balay isend[2] = info->nz_unneeded; 16289371c9d4SSatish Balay isend[3] = info->memory; 16299371c9d4SSatish Balay isend[4] = info->mallocs; 16302205254eSKarl Rupp 16319566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 16322205254eSKarl Rupp 16339371c9d4SSatish Balay isend[0] += info->nz_used; 16349371c9d4SSatish Balay isend[1] += info->nz_allocated; 16359371c9d4SSatish Balay isend[2] += info->nz_unneeded; 16369371c9d4SSatish Balay isend[3] += info->memory; 16379371c9d4SSatish Balay isend[4] += info->mallocs; 1638a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 16394e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 16404e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 16414e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 16424e220ebcSLois Curfman McInnes info->memory = isend[3]; 16434e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 1644a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1645462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 16462205254eSKarl Rupp 16474e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 16484e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 16494e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 16504e220ebcSLois Curfman McInnes info->memory = irecv[3]; 16514e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1652a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1653462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 16542205254eSKarl Rupp 16554e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 16564e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 16574e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 16584e220ebcSLois Curfman McInnes info->memory = irecv[3]; 16594e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1660a66be287SLois Curfman McInnes } 16614e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 16624e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16634e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1665a66be287SLois Curfman McInnes } 1666a66be287SLois Curfman McInnes 1667d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1668d71ae5a4SJacob Faibussowitsch { 1669c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1670c74985f6SBarry Smith 16713a40ed3dSBarry Smith PetscFunctionBegin; 167212c028f9SKris Buschelman switch (op) { 1673512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 167412c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 167528b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1676a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 167712c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 16780ad02fcaSStefano Zampini case MAT_USE_INODES: 167912c028f9SKris Buschelman case MAT_IGNORE_ZERO_ENTRIES: 16801a2c6b5cSJunchao Zhang case MAT_FORM_EXPLICIT_TRANSPOSE: 1681fa1f0d2cSMatthew G Knepley MatCheckPreallocated(A, 1); 16829566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 16839566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 168412c028f9SKris Buschelman break; 168512c028f9SKris Buschelman case MAT_ROW_ORIENTED: 168643674050SBarry Smith MatCheckPreallocated(A, 1); 16874e0d8c25SBarry Smith a->roworiented = flg; 16882205254eSKarl Rupp 16899566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 16909566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 169112c028f9SKris Buschelman break; 16928c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1693d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 1694d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1695d71ae5a4SJacob Faibussowitsch break; 1696d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 1697d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 1698d71ae5a4SJacob Faibussowitsch break; 1699c8ca1fbcSVaclav Hapla /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1700ffa07934SHong Zhang case MAT_SPD: 170177e54ba9SKris Buschelman case MAT_SYMMETRIC: 170277e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 1703bf108f30SBarry Smith case MAT_HERMITIAN: 1704bf108f30SBarry Smith case MAT_SYMMETRY_ETERNAL: 1705b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1706b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 1707b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 170877e54ba9SKris Buschelman break; 1709d71ae5a4SJacob Faibussowitsch case MAT_SUBMAT_SINGLEIS: 1710d71ae5a4SJacob Faibussowitsch A->submat_singleis = flg; 1711d71ae5a4SJacob Faibussowitsch break; 1712957cac9fSHong Zhang case MAT_STRUCTURE_ONLY: 1713957cac9fSHong Zhang /* The option is handled directly by MatSetOption() */ 1714957cac9fSHong Zhang break; 1715d71ae5a4SJacob Faibussowitsch default: 1716d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 17173a40ed3dSBarry Smith } 17183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1719c74985f6SBarry Smith } 1720c74985f6SBarry Smith 1721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1722d71ae5a4SJacob Faibussowitsch { 1723154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 172487828ca2SBarry Smith PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1725d0f46423SBarry Smith PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1726d0f46423SBarry Smith PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1727b1d57f15SBarry Smith PetscInt *cmap, *idx_p; 172839e00950SLois Curfman McInnes 17293a40ed3dSBarry Smith PetscFunctionBegin; 173028b400f6SJacob Faibussowitsch PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 17317a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 17327a0afa10SBarry Smith 173370f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 17347a0afa10SBarry Smith /* 17357a0afa10SBarry Smith allocate enough space to hold information from the longest row. 17367a0afa10SBarry Smith */ 17377a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1738b1d57f15SBarry Smith PetscInt max = 1, tmp; 1739d0f46423SBarry Smith for (i = 0; i < matin->rmap->n; i++) { 17407a0afa10SBarry Smith tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 17412205254eSKarl Rupp if (max < tmp) max = tmp; 17427a0afa10SBarry Smith } 17439566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 17447a0afa10SBarry Smith } 17457a0afa10SBarry Smith 1746aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1747abc0e9e4SLois Curfman McInnes lrow = row - rstart; 174839e00950SLois Curfman McInnes 17499371c9d4SSatish Balay pvA = &vworkA; 17509371c9d4SSatish Balay pcA = &cworkA; 17519371c9d4SSatish Balay pvB = &vworkB; 17529371c9d4SSatish Balay pcB = &cworkB; 17539371c9d4SSatish Balay if (!v) { 17549371c9d4SSatish Balay pvA = NULL; 17559371c9d4SSatish Balay pvB = NULL; 17569371c9d4SSatish Balay } 17579371c9d4SSatish Balay if (!idx) { 17589371c9d4SSatish Balay pcA = NULL; 17599371c9d4SSatish Balay if (!v) pcB = NULL; 17609371c9d4SSatish Balay } 17619566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 17629566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1763154123eaSLois Curfman McInnes nztot = nzA + nzB; 1764154123eaSLois Curfman McInnes 176570f0671dSBarry Smith cmap = mat->garray; 1766154123eaSLois Curfman McInnes if (v || idx) { 1767154123eaSLois Curfman McInnes if (nztot) { 1768154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1769b1d57f15SBarry Smith PetscInt imark = -1; 1770154123eaSLois Curfman McInnes if (v) { 177170f0671dSBarry Smith *v = v_p = mat->rowvalues; 177239e00950SLois Curfman McInnes for (i = 0; i < nzB; i++) { 177370f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1774154123eaSLois Curfman McInnes else break; 1775154123eaSLois Curfman McInnes } 1776154123eaSLois Curfman McInnes imark = i; 177770f0671dSBarry Smith for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 177870f0671dSBarry Smith for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1779154123eaSLois Curfman McInnes } 1780154123eaSLois Curfman McInnes if (idx) { 178170f0671dSBarry Smith *idx = idx_p = mat->rowindices; 178270f0671dSBarry Smith if (imark > -1) { 1783ad540459SPierre Jolivet for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 178470f0671dSBarry Smith } else { 1785154123eaSLois Curfman McInnes for (i = 0; i < nzB; i++) { 178670f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1787154123eaSLois Curfman McInnes else break; 1788154123eaSLois Curfman McInnes } 1789154123eaSLois Curfman McInnes imark = i; 179070f0671dSBarry Smith } 179170f0671dSBarry Smith for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 179270f0671dSBarry Smith for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 179339e00950SLois Curfman McInnes } 17943f97c4b0SBarry Smith } else { 1795f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1796f4259b30SLisandro Dalcin if (v) *v = NULL; 17971ca473b0SSatish Balay } 1798154123eaSLois Curfman McInnes } 179939e00950SLois Curfman McInnes *nz = nztot; 18009566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 18019566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 18023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 180339e00950SLois Curfman McInnes } 180439e00950SLois Curfman McInnes 1805d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1806d71ae5a4SJacob Faibussowitsch { 18077a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 18083a40ed3dSBarry Smith 18093a40ed3dSBarry Smith PetscFunctionBegin; 181028b400f6SJacob Faibussowitsch PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 18117a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 18123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 181339e00950SLois Curfman McInnes } 181439e00950SLois Curfman McInnes 1815ba38deedSJacob Faibussowitsch static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1816d71ae5a4SJacob Faibussowitsch { 1817855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1818ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1819d0f46423SBarry Smith PetscInt i, j, cstart = mat->cmap->rstart; 1820329f5518SBarry Smith PetscReal sum = 0.0; 1821fff043a9SJunchao Zhang const MatScalar *v, *amata, *bmata; 18226497c311SBarry Smith PetscMPIInt iN; 182304ca555eSLois Curfman McInnes 18243a40ed3dSBarry Smith PetscFunctionBegin; 182517699dbbSLois Curfman McInnes if (aij->size == 1) { 18269566063dSJacob Faibussowitsch PetscCall(MatNorm(aij->A, type, norm)); 182737fa93a5SLois Curfman McInnes } else { 18289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 18299566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 183004ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 1831fff043a9SJunchao Zhang v = amata; 183204ca555eSLois Curfman McInnes for (i = 0; i < amat->nz; i++) { 18339371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 18349371c9d4SSatish Balay v++; 183504ca555eSLois Curfman McInnes } 1836fff043a9SJunchao Zhang v = bmata; 183704ca555eSLois Curfman McInnes for (i = 0; i < bmat->nz; i++) { 18389371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 18399371c9d4SSatish Balay v++; 184004ca555eSLois Curfman McInnes } 1841462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 18428f1a2a5eSBarry Smith *norm = PetscSqrtReal(*norm); 18439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 18443a40ed3dSBarry Smith } else if (type == NORM_1) { /* max column norm */ 1845329f5518SBarry Smith PetscReal *tmp, *tmp2; 1846b1d57f15SBarry Smith PetscInt *jj, *garray = aij->garray; 18479566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 18489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 184904ca555eSLois Curfman McInnes *norm = 0.0; 18509371c9d4SSatish Balay v = amata; 18519371c9d4SSatish Balay jj = amat->j; 185204ca555eSLois Curfman McInnes for (j = 0; j < amat->nz; j++) { 18539371c9d4SSatish Balay tmp[cstart + *jj++] += PetscAbsScalar(*v); 18549371c9d4SSatish Balay v++; 185504ca555eSLois Curfman McInnes } 18569371c9d4SSatish Balay v = bmata; 18579371c9d4SSatish Balay jj = bmat->j; 185804ca555eSLois Curfman McInnes for (j = 0; j < bmat->nz; j++) { 18599371c9d4SSatish Balay tmp[garray[*jj++]] += PetscAbsScalar(*v); 18609371c9d4SSatish Balay v++; 186104ca555eSLois Curfman McInnes } 18626497c311SBarry Smith PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1863462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1864d0f46423SBarry Smith for (j = 0; j < mat->cmap->N; j++) { 186504ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 186604ca555eSLois Curfman McInnes } 18679566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp)); 18689566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp2)); 18699566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 18703a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1871329f5518SBarry Smith PetscReal ntemp = 0.0; 1872d0f46423SBarry Smith for (j = 0; j < aij->A->rmap->n; j++) { 18738e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(amata, amat->i[j]); 187404ca555eSLois Curfman McInnes sum = 0.0; 187504ca555eSLois Curfman McInnes for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 18769371c9d4SSatish Balay sum += PetscAbsScalar(*v); 18779371c9d4SSatish Balay v++; 187804ca555eSLois Curfman McInnes } 18798e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 188004ca555eSLois Curfman McInnes for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 18819371c9d4SSatish Balay sum += PetscAbsScalar(*v); 18829371c9d4SSatish Balay v++; 188304ca555eSLois Curfman McInnes } 1884515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 188504ca555eSLois Curfman McInnes } 1886462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 18879566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 18899566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 18909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 189137fa93a5SLois Curfman McInnes } 18923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1893855ac2c5SLois Curfman McInnes } 1894855ac2c5SLois Curfman McInnes 1895ba38deedSJacob Faibussowitsch static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1896d71ae5a4SJacob Faibussowitsch { 1897a8661f62Sandi selinger Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1898a8661f62Sandi selinger Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1899071fcb05SBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1900071fcb05SBarry Smith const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1901a8661f62Sandi selinger Mat B, A_diag, *B_diag; 1902ce496241SStefano Zampini const MatScalar *pbv, *bv; 1903b7c46309SBarry Smith 19043a40ed3dSBarry Smith PetscFunctionBegin; 19057fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 19069371c9d4SSatish Balay ma = A->rmap->n; 19079371c9d4SSatish Balay na = A->cmap->n; 19089371c9d4SSatish Balay mb = a->B->rmap->n; 19099371c9d4SSatish Balay nb = a->B->cmap->n; 19109371c9d4SSatish Balay ai = Aloc->i; 19119371c9d4SSatish Balay aj = Aloc->j; 19129371c9d4SSatish Balay bi = Bloc->i; 19139371c9d4SSatish Balay bj = Bloc->j; 1914fc73b1b3SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 191580bcc5a1SJed Brown PetscInt *d_nnz, *g_nnz, *o_nnz; 191680bcc5a1SJed Brown PetscSFNode *oloc; 1917713c93b4SJed Brown PETSC_UNUSED PetscSF sf; 191880bcc5a1SJed Brown 19199566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 192080bcc5a1SJed Brown /* compute d_nnz for preallocation */ 19219566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(d_nnz, na)); 1922cbc6b225SStefano Zampini for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 192380bcc5a1SJed Brown /* compute local off-diagonal contributions */ 19249566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(g_nnz, nb)); 192580bcc5a1SJed Brown for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 192680bcc5a1SJed Brown /* map those to global */ 19279566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 19289566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 19299566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 19309566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(o_nnz, na)); 193157168dbeSPierre Jolivet PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 193257168dbeSPierre Jolivet PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 19339566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 1934d4bb536fSBarry Smith 19359566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 19369566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 19379566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 19389566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 19399566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 19409566063dSJacob Faibussowitsch PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1941fc4dec0aSBarry Smith } else { 1942fc4dec0aSBarry Smith B = *matout; 19439566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1944fc4dec0aSBarry Smith } 1945b7c46309SBarry Smith 1946f79cb1a0Sandi selinger b = (Mat_MPIAIJ *)B->data; 1947a8661f62Sandi selinger A_diag = a->A; 1948a8661f62Sandi selinger B_diag = &b->A; 1949a8661f62Sandi selinger sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1950a8661f62Sandi selinger A_diag_ncol = A_diag->cmap->N; 1951a8661f62Sandi selinger B_diag_ilen = sub_B_diag->ilen; 1952a8661f62Sandi selinger B_diag_i = sub_B_diag->i; 1953f79cb1a0Sandi selinger 1954f79cb1a0Sandi selinger /* Set ilen for diagonal of B */ 1955ad540459SPierre Jolivet for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1956f79cb1a0Sandi selinger 19574cf0e950SBarry Smith /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1958a8661f62Sandi selinger very quickly (=without using MatSetValues), because all writes are local. */ 19597fb60732SBarry Smith PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 19609566063dSJacob Faibussowitsch PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1961f79cb1a0Sandi selinger 1962b7c46309SBarry Smith /* copy over the B part */ 19639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[mb], &cols)); 19649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1965ce496241SStefano Zampini pbv = bv; 1966d0f46423SBarry Smith row = A->rmap->rstart; 19672205254eSKarl Rupp for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 196861a2fbbaSHong Zhang cols_tmp = cols; 1969da668accSHong Zhang for (i = 0; i < mb; i++) { 1970da668accSHong Zhang ncol = bi[i + 1] - bi[i]; 19719566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 19722205254eSKarl Rupp row++; 1973720a2405SPierre Jolivet if (pbv) pbv += ncol; 1974720a2405SPierre Jolivet if (cols_tmp) cols_tmp += ncol; 1975b7c46309SBarry Smith } 19769566063dSJacob Faibussowitsch PetscCall(PetscFree(cols)); 19779566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1978fc73b1b3SBarry Smith 19799566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 19809566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1981cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 19820de55854SLois Curfman McInnes *matout = B; 19830de55854SLois Curfman McInnes } else { 19849566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &B)); 19850de55854SLois Curfman McInnes } 19863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1987b7c46309SBarry Smith } 1988b7c46309SBarry Smith 1989ba38deedSJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1990d71ae5a4SJacob Faibussowitsch { 19914b967eb1SSatish Balay Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19924b967eb1SSatish Balay Mat a = aij->A, b = aij->B; 1993b1d57f15SBarry Smith PetscInt s1, s2, s3; 1994a008b906SSatish Balay 19953a40ed3dSBarry Smith PetscFunctionBegin; 19969566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &s2, &s3)); 19974b967eb1SSatish Balay if (rr) { 19989566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &s1)); 199908401ef6SPierre Jolivet PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 20004b967eb1SSatish Balay /* Overlap communication with computation. */ 20019566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002a008b906SSatish Balay } 20034b967eb1SSatish Balay if (ll) { 20049566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(ll, &s1)); 200508401ef6SPierre Jolivet PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2006dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, ll, NULL); 20074b967eb1SSatish Balay } 20084b967eb1SSatish Balay /* scale the diagonal block */ 2009dbbe0bcdSBarry Smith PetscUseTypeMethod(a, diagonalscale, ll, rr); 20104b967eb1SSatish Balay 20114b967eb1SSatish Balay if (rr) { 20124b967eb1SSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 20139566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2014dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 20154b967eb1SSatish Balay } 20163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2017a008b906SSatish Balay } 2018a008b906SSatish Balay 2019ba38deedSJacob Faibussowitsch static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020d71ae5a4SJacob Faibussowitsch { 2021bb5a7306SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 20223a40ed3dSBarry Smith 20233a40ed3dSBarry Smith PetscFunctionBegin; 20249566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 20253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2026bb5a7306SBarry Smith } 2027bb5a7306SBarry Smith 2028ba38deedSJacob Faibussowitsch static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2029d71ae5a4SJacob Faibussowitsch { 2030d4bb536fSBarry Smith Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2031d4bb536fSBarry Smith Mat a, b, c, d; 2032ace3abfcSBarry Smith PetscBool flg; 2033d4bb536fSBarry Smith 20343a40ed3dSBarry Smith PetscFunctionBegin; 20359371c9d4SSatish Balay a = matA->A; 20369371c9d4SSatish Balay b = matA->B; 20379371c9d4SSatish Balay c = matB->A; 20389371c9d4SSatish Balay d = matB->B; 2039d4bb536fSBarry Smith 20409566063dSJacob Faibussowitsch PetscCall(MatEqual(a, c, &flg)); 204148a46eb9SPierre Jolivet if (flg) PetscCall(MatEqual(b, d, &flg)); 2042462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 20433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2044d4bb536fSBarry Smith } 2045d4bb536fSBarry Smith 2046ba38deedSJacob Faibussowitsch static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2047d71ae5a4SJacob Faibussowitsch { 2048cb5b572fSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2049cb5b572fSBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2050cb5b572fSBarry Smith 2051cb5b572fSBarry Smith PetscFunctionBegin; 205233f4a19fSKris Buschelman /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 205333f4a19fSKris Buschelman if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2054cb5b572fSBarry Smith /* because of the column compression in the off-processor part of the matrix a->B, 2055cb5b572fSBarry Smith the number of columns in a->B and b->B may be different, hence we cannot call 2056cb5b572fSBarry Smith the MatCopy() directly on the two parts. If need be, we can provide a more 2057cb5b572fSBarry Smith efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2058cb5b572fSBarry Smith then copying the submatrices */ 20599566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 2060cb5b572fSBarry Smith } else { 20619566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A, b->A, str)); 20629566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B, b->B, str)); 2063cb5b572fSBarry Smith } 20649566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 20653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2066cb5b572fSBarry Smith } 2067cb5b572fSBarry Smith 2068001ddc4fSHong Zhang /* 2069001ddc4fSHong Zhang Computes the number of nonzeros per row needed for preallocation when X and Y 2070001ddc4fSHong Zhang have different nonzero structure. 2071001ddc4fSHong Zhang */ 2072d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2073d71ae5a4SJacob Faibussowitsch { 2074001ddc4fSHong Zhang PetscInt i, j, k, nzx, nzy; 207595b7e79eSJed Brown 207695b7e79eSJed Brown PetscFunctionBegin; 207795b7e79eSJed Brown /* Set the number of nonzeros in the new matrix */ 207895b7e79eSJed Brown for (i = 0; i < m; i++) { 20798e3a54c0SPierre Jolivet const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2080001ddc4fSHong Zhang nzx = xi[i + 1] - xi[i]; 2081001ddc4fSHong Zhang nzy = yi[i + 1] - yi[i]; 208295b7e79eSJed Brown nnz[i] = 0; 208395b7e79eSJed Brown for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2084001ddc4fSHong Zhang for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085001ddc4fSHong Zhang if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 208695b7e79eSJed Brown nnz[i]++; 208795b7e79eSJed Brown } 208895b7e79eSJed Brown for (; k < nzy; k++) nnz[i]++; 208995b7e79eSJed Brown } 20903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 209195b7e79eSJed Brown } 209295b7e79eSJed Brown 2093001ddc4fSHong Zhang /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2095d71ae5a4SJacob Faibussowitsch { 2096001ddc4fSHong Zhang PetscInt m = Y->rmap->N; 2097001ddc4fSHong Zhang Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2098001ddc4fSHong Zhang Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2099001ddc4fSHong Zhang 2100001ddc4fSHong Zhang PetscFunctionBegin; 21019566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 21023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2103001ddc4fSHong Zhang } 2104001ddc4fSHong Zhang 2105ba38deedSJacob Faibussowitsch static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2106d71ae5a4SJacob Faibussowitsch { 2107ac90fabeSBarry Smith Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2108ac90fabeSBarry Smith 2109ac90fabeSBarry Smith PetscFunctionBegin; 2110ac90fabeSBarry Smith if (str == SAME_NONZERO_PATTERN) { 21119566063dSJacob Faibussowitsch PetscCall(MatAXPY(yy->A, a, xx->A, str)); 21129566063dSJacob Faibussowitsch PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2113ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 21149566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 2115ac90fabeSBarry Smith } else { 21169f5f6813SShri Abhyankar Mat B; 21179f5f6813SShri Abhyankar PetscInt *nnz_d, *nnz_o; 2118d9d719b4SStefano Zampini 21199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 21209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 21219566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 21229566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 21239566063dSJacob Faibussowitsch PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 21249566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 21259566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 21269566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 21279566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 21289566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 21299566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 21309566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 21319566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 2132ac90fabeSBarry Smith } 21333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2134ac90fabeSBarry Smith } 2135ac90fabeSBarry Smith 21362726fb6dSPierre Jolivet PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137354c94deSBarry Smith 2138ba38deedSJacob Faibussowitsch static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139d71ae5a4SJacob Faibussowitsch { 21405f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 21415f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 2142354c94deSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2143354c94deSBarry Smith 21449566063dSJacob Faibussowitsch PetscCall(MatConjugate_SeqAIJ(aij->A)); 21459566063dSJacob Faibussowitsch PetscCall(MatConjugate_SeqAIJ(aij->B)); 21465f80ce2aSJacob Faibussowitsch } 21473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2148354c94deSBarry Smith } 2149354c94deSBarry Smith 2150ba38deedSJacob Faibussowitsch static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151d71ae5a4SJacob Faibussowitsch { 215299cafbc1SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 215399cafbc1SBarry Smith 215499cafbc1SBarry Smith PetscFunctionBegin; 21559566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 21569566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 21573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 215899cafbc1SBarry Smith } 215999cafbc1SBarry Smith 2160ba38deedSJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161d71ae5a4SJacob Faibussowitsch { 216299cafbc1SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 216399cafbc1SBarry Smith 216499cafbc1SBarry Smith PetscFunctionBegin; 21659566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 21669566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 21673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 216899cafbc1SBarry Smith } 216999cafbc1SBarry Smith 2170ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2171d71ae5a4SJacob Faibussowitsch { 2172c91732d9SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2173475b8b61SHong Zhang PetscInt i, *idxb = NULL, m = A->rmap->n; 2174475b8b61SHong Zhang PetscScalar *va, *vv; 2175475b8b61SHong Zhang Vec vB, vA; 2176475b8b61SHong Zhang const PetscScalar *vb; 2177c91732d9SHong Zhang 2178c91732d9SHong Zhang PetscFunctionBegin; 2179c7b600bfSPierre Jolivet PetscCall(MatCreateVecs(a->A, NULL, &vA)); 21809566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2181475b8b61SHong Zhang 21829566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(vA, &va)); 2183c91732d9SHong Zhang if (idx) { 2184475b8b61SHong Zhang for (i = 0; i < m; i++) { 2185d0f46423SBarry Smith if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186c91732d9SHong Zhang } 2187c91732d9SHong Zhang } 2188c91732d9SHong Zhang 2189c7b600bfSPierre Jolivet PetscCall(MatCreateVecs(a->B, NULL, &vB)); 21909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &idxb)); 21919566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2192c91732d9SHong Zhang 21939566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &vv)); 21949566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(vB, &vb)); 2195475b8b61SHong Zhang for (i = 0; i < m; i++) { 2196c91732d9SHong Zhang if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197475b8b61SHong Zhang vv[i] = vb[i]; 2198c91732d9SHong Zhang if (idx) idx[i] = a->garray[idxb[i]]; 2199475b8b61SHong Zhang } else { 2200475b8b61SHong Zhang vv[i] = va[i]; 22019371c9d4SSatish Balay if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2202c91732d9SHong Zhang } 2203c91732d9SHong Zhang } 22049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(vA, &vv)); 22059566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(vA, &va)); 22069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(vB, &vb)); 22079566063dSJacob Faibussowitsch PetscCall(PetscFree(idxb)); 22089566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vA)); 22099566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vB)); 22103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2211c91732d9SHong Zhang } 2212c91732d9SHong Zhang 2213eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2214eede4a3fSMark Adams { 2215eede4a3fSMark Adams Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2216eede4a3fSMark Adams Vec vB, vA; 2217eede4a3fSMark Adams 2218eede4a3fSMark Adams PetscFunctionBegin; 2219c7b600bfSPierre Jolivet PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2220eede4a3fSMark Adams PetscCall(MatGetRowSumAbs(a->A, vA)); 2221c7b600bfSPierre Jolivet PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2222eede4a3fSMark Adams PetscCall(MatGetRowSumAbs(a->B, vB)); 2223eede4a3fSMark Adams PetscCall(VecAXPY(vA, 1.0, vB)); 2224eede4a3fSMark Adams PetscCall(VecDestroy(&vB)); 2225eede4a3fSMark Adams PetscCall(VecCopy(vA, v)); 2226eede4a3fSMark Adams PetscCall(VecDestroy(&vA)); 2227eede4a3fSMark Adams PetscFunctionReturn(PETSC_SUCCESS); 2228eede4a3fSMark Adams } 2229eede4a3fSMark Adams 2230ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231d71ae5a4SJacob Faibussowitsch { 2232f07e67edSHong Zhang Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233f07e67edSHong Zhang PetscInt m = A->rmap->n, n = A->cmap->n; 2234f07e67edSHong Zhang PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235f07e67edSHong Zhang PetscInt *cmap = mat->garray; 2236f07e67edSHong Zhang PetscInt *diagIdx, *offdiagIdx; 2237f07e67edSHong Zhang Vec diagV, offdiagV; 2238ce496241SStefano Zampini PetscScalar *a, *diagA, *offdiagA; 2239ce496241SStefano Zampini const PetscScalar *ba, *bav; 2240f07e67edSHong Zhang PetscInt r, j, col, ncols, *bi, *bj; 2241f07e67edSHong Zhang Mat B = mat->B; 2242f07e67edSHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243c87e5d42SMatthew Knepley 2244c87e5d42SMatthew Knepley PetscFunctionBegin; 2245f07e67edSHong Zhang /* When a process holds entire A and other processes have no entry */ 2246f07e67edSHong Zhang if (A->cmap->N == n) { 22479566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &diagA)); 22489566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 22499566063dSJacob Faibussowitsch PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 22509566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 22519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &diagA)); 22523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2253f07e67edSHong Zhang } else if (n == 0) { 2254f07e67edSHong Zhang if (m) { 22559566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 22569371c9d4SSatish Balay for (r = 0; r < m; r++) { 22579371c9d4SSatish Balay a[r] = 0.0; 22589371c9d4SSatish Balay if (idx) idx[r] = -1; 22599371c9d4SSatish Balay } 22609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 2261f07e67edSHong Zhang } 22623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2263f07e67edSHong Zhang } 2264f07e67edSHong Zhang 22659566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 22669566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 22679566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 22689566063dSJacob Faibussowitsch PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269f07e67edSHong Zhang 2270f07e67edSHong Zhang /* Get offdiagIdx[] for implicit 0.0 */ 22719566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272ce496241SStefano Zampini ba = bav; 2273f07e67edSHong Zhang bi = b->i; 2274f07e67edSHong Zhang bj = b->j; 22759566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276f07e67edSHong Zhang for (r = 0; r < m; r++) { 2277f07e67edSHong Zhang ncols = bi[r + 1] - bi[r]; 2278f07e67edSHong Zhang if (ncols == A->cmap->N - n) { /* Brow is dense */ 22799371c9d4SSatish Balay offdiagA[r] = *ba; 22809371c9d4SSatish Balay offdiagIdx[r] = cmap[0]; 2281f07e67edSHong Zhang } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282f07e67edSHong Zhang offdiagA[r] = 0.0; 2283f07e67edSHong Zhang 2284f07e67edSHong Zhang /* Find first hole in the cmap */ 2285f07e67edSHong Zhang for (j = 0; j < ncols; j++) { 2286f07e67edSHong Zhang col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287f07e67edSHong Zhang if (col > j && j < cstart) { 2288f07e67edSHong Zhang offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289f07e67edSHong Zhang break; 2290f07e67edSHong Zhang } else if (col > j + n && j >= cstart) { 2291f07e67edSHong Zhang offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292f07e67edSHong Zhang break; 2293f07e67edSHong Zhang } 2294f07e67edSHong Zhang } 22954e879edeSHong Zhang if (j == ncols && ncols < A->cmap->N - n) { 2296f07e67edSHong Zhang /* a hole is outside compressed Bcols */ 2297f07e67edSHong Zhang if (ncols == 0) { 2298f07e67edSHong Zhang if (cstart) { 2299f07e67edSHong Zhang offdiagIdx[r] = 0; 2300f07e67edSHong Zhang } else offdiagIdx[r] = cend; 2301f07e67edSHong Zhang } else { /* ncols > 0 */ 2302f07e67edSHong Zhang offdiagIdx[r] = cmap[ncols - 1] + 1; 2303f07e67edSHong Zhang if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304f07e67edSHong Zhang } 2305f07e67edSHong Zhang } 2306f07e67edSHong Zhang } 2307f07e67edSHong Zhang 2308f07e67edSHong Zhang for (j = 0; j < ncols; j++) { 23099371c9d4SSatish Balay if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 23109371c9d4SSatish Balay offdiagA[r] = *ba; 23119371c9d4SSatish Balay offdiagIdx[r] = cmap[*bj]; 23129371c9d4SSatish Balay } 23139371c9d4SSatish Balay ba++; 23149371c9d4SSatish Balay bj++; 2315f07e67edSHong Zhang } 2316f07e67edSHong Zhang } 2317f07e67edSHong Zhang 23189566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 23199566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320f07e67edSHong Zhang for (r = 0; r < m; ++r) { 2321f07e67edSHong Zhang if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322f07e67edSHong Zhang a[r] = diagA[r]; 2323f07e67edSHong Zhang if (idx) idx[r] = cstart + diagIdx[r]; 2324f07e67edSHong Zhang } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325f07e67edSHong Zhang a[r] = diagA[r]; 2326c87e5d42SMatthew Knepley if (idx) { 2327f07e67edSHong Zhang if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328f07e67edSHong Zhang idx[r] = cstart + diagIdx[r]; 2329f07e67edSHong Zhang } else idx[r] = offdiagIdx[r]; 2330f07e67edSHong Zhang } 2331f07e67edSHong Zhang } else { 2332f07e67edSHong Zhang a[r] = offdiagA[r]; 2333f07e67edSHong Zhang if (idx) idx[r] = offdiagIdx[r]; 2334c87e5d42SMatthew Knepley } 2335c87e5d42SMatthew Knepley } 23369566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 23379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 23389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 23399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 23409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 23419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&offdiagV)); 23429566063dSJacob Faibussowitsch PetscCall(PetscFree2(diagIdx, offdiagIdx)); 23433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2344c87e5d42SMatthew Knepley } 2345c87e5d42SMatthew Knepley 2346ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347d71ae5a4SJacob Faibussowitsch { 234803bc72f1SMatthew Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349fa213d2fSHong Zhang PetscInt m = A->rmap->n, n = A->cmap->n; 2350fa213d2fSHong Zhang PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 235103bc72f1SMatthew Knepley PetscInt *cmap = mat->garray; 235203bc72f1SMatthew Knepley PetscInt *diagIdx, *offdiagIdx; 235303bc72f1SMatthew Knepley Vec diagV, offdiagV; 2354ce496241SStefano Zampini PetscScalar *a, *diagA, *offdiagA; 2355ce496241SStefano Zampini const PetscScalar *ba, *bav; 2356fa213d2fSHong Zhang PetscInt r, j, col, ncols, *bi, *bj; 2357fa213d2fSHong Zhang Mat B = mat->B; 2358fa213d2fSHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 235903bc72f1SMatthew Knepley 236003bc72f1SMatthew Knepley PetscFunctionBegin; 2361fa213d2fSHong Zhang /* When a process holds entire A and other processes have no entry */ 2362fa213d2fSHong Zhang if (A->cmap->N == n) { 23639566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &diagA)); 23649566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 23659566063dSJacob Faibussowitsch PetscCall(MatGetRowMin(mat->A, diagV, idx)); 23669566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 23679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &diagA)); 23683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2369fa213d2fSHong Zhang } else if (n == 0) { 2370fa213d2fSHong Zhang if (m) { 23719566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 23729371c9d4SSatish Balay for (r = 0; r < m; r++) { 23739371c9d4SSatish Balay a[r] = PETSC_MAX_REAL; 23749371c9d4SSatish Balay if (idx) idx[r] = -1; 23759371c9d4SSatish Balay } 23769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 2377fa213d2fSHong Zhang } 23783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2379fa213d2fSHong Zhang } 2380fa213d2fSHong Zhang 23819566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 23829566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 23839566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 23849566063dSJacob Faibussowitsch PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385fa213d2fSHong Zhang 2386fa213d2fSHong Zhang /* Get offdiagIdx[] for implicit 0.0 */ 23879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388ce496241SStefano Zampini ba = bav; 2389fa213d2fSHong Zhang bi = b->i; 2390fa213d2fSHong Zhang bj = b->j; 23919566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392fa213d2fSHong Zhang for (r = 0; r < m; r++) { 2393fa213d2fSHong Zhang ncols = bi[r + 1] - bi[r]; 2394fa213d2fSHong Zhang if (ncols == A->cmap->N - n) { /* Brow is dense */ 23959371c9d4SSatish Balay offdiagA[r] = *ba; 23969371c9d4SSatish Balay offdiagIdx[r] = cmap[0]; 2397fa213d2fSHong Zhang } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398fa213d2fSHong Zhang offdiagA[r] = 0.0; 2399fa213d2fSHong Zhang 2400fa213d2fSHong Zhang /* Find first hole in the cmap */ 2401fa213d2fSHong Zhang for (j = 0; j < ncols; j++) { 2402fa213d2fSHong Zhang col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403fa213d2fSHong Zhang if (col > j && j < cstart) { 2404fa213d2fSHong Zhang offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405fa213d2fSHong Zhang break; 2406fa213d2fSHong Zhang } else if (col > j + n && j >= cstart) { 2407fa213d2fSHong Zhang offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408fa213d2fSHong Zhang break; 2409fa213d2fSHong Zhang } 2410fa213d2fSHong Zhang } 24114e879edeSHong Zhang if (j == ncols && ncols < A->cmap->N - n) { 2412fa213d2fSHong Zhang /* a hole is outside compressed Bcols */ 2413fa213d2fSHong Zhang if (ncols == 0) { 2414fa213d2fSHong Zhang if (cstart) { 2415fa213d2fSHong Zhang offdiagIdx[r] = 0; 2416fa213d2fSHong Zhang } else offdiagIdx[r] = cend; 2417fa213d2fSHong Zhang } else { /* ncols > 0 */ 2418fa213d2fSHong Zhang offdiagIdx[r] = cmap[ncols - 1] + 1; 2419fa213d2fSHong Zhang if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420fa213d2fSHong Zhang } 2421fa213d2fSHong Zhang } 2422fa213d2fSHong Zhang } 2423fa213d2fSHong Zhang 2424fa213d2fSHong Zhang for (j = 0; j < ncols; j++) { 24259371c9d4SSatish Balay if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 24269371c9d4SSatish Balay offdiagA[r] = *ba; 24279371c9d4SSatish Balay offdiagIdx[r] = cmap[*bj]; 24289371c9d4SSatish Balay } 24299371c9d4SSatish Balay ba++; 24309371c9d4SSatish Balay bj++; 2431fa213d2fSHong Zhang } 2432fa213d2fSHong Zhang } 2433fa213d2fSHong Zhang 24349566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 24359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436fa213d2fSHong Zhang for (r = 0; r < m; ++r) { 2437fa213d2fSHong Zhang if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 243803bc72f1SMatthew Knepley a[r] = diagA[r]; 2439fa213d2fSHong Zhang if (idx) idx[r] = cstart + diagIdx[r]; 2440fa213d2fSHong Zhang } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441fa213d2fSHong Zhang a[r] = diagA[r]; 2442fa213d2fSHong Zhang if (idx) { 2443fa213d2fSHong Zhang if (cstart + diagIdx[r] <= offdiagIdx[r]) { 244403bc72f1SMatthew Knepley idx[r] = cstart + diagIdx[r]; 2445fa213d2fSHong Zhang } else idx[r] = offdiagIdx[r]; 2446fa213d2fSHong Zhang } 244703bc72f1SMatthew Knepley } else { 244803bc72f1SMatthew Knepley a[r] = offdiagA[r]; 2449fa213d2fSHong Zhang if (idx) idx[r] = offdiagIdx[r]; 245003bc72f1SMatthew Knepley } 245103bc72f1SMatthew Knepley } 24529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 24539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 24549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 24559566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 24569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 24579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&offdiagV)); 24589566063dSJacob Faibussowitsch PetscCall(PetscFree2(diagIdx, offdiagIdx)); 24593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 246003bc72f1SMatthew Knepley } 246103bc72f1SMatthew Knepley 2462ba38deedSJacob Faibussowitsch static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463d71ae5a4SJacob Faibussowitsch { 2464c87e5d42SMatthew Knepley Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 24651a254869SHong Zhang PetscInt m = A->rmap->n, n = A->cmap->n; 24661a254869SHong Zhang PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467c87e5d42SMatthew Knepley PetscInt *cmap = mat->garray; 2468c87e5d42SMatthew Knepley PetscInt *diagIdx, *offdiagIdx; 2469c87e5d42SMatthew Knepley Vec diagV, offdiagV; 2470ce496241SStefano Zampini PetscScalar *a, *diagA, *offdiagA; 2471ce496241SStefano Zampini const PetscScalar *ba, *bav; 24721a254869SHong Zhang PetscInt r, j, col, ncols, *bi, *bj; 24731a254869SHong Zhang Mat B = mat->B; 24741a254869SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475c87e5d42SMatthew Knepley 2476c87e5d42SMatthew Knepley PetscFunctionBegin; 24771a254869SHong Zhang /* When a process holds entire A and other processes have no entry */ 24781a254869SHong Zhang if (A->cmap->N == n) { 24799566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &diagA)); 24809566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 24819566063dSJacob Faibussowitsch PetscCall(MatGetRowMax(mat->A, diagV, idx)); 24829566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 24839566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &diagA)); 24843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24851a254869SHong Zhang } else if (n == 0) { 24861a254869SHong Zhang if (m) { 24879566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 24889371c9d4SSatish Balay for (r = 0; r < m; r++) { 24899371c9d4SSatish Balay a[r] = PETSC_MIN_REAL; 24909371c9d4SSatish Balay if (idx) idx[r] = -1; 24919371c9d4SSatish Balay } 24929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 24931a254869SHong Zhang } 24943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24951a254869SHong Zhang } 24961a254869SHong Zhang 24979566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 24989566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 24999566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 25009566063dSJacob Faibussowitsch PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 25011a254869SHong Zhang 25021a254869SHong Zhang /* Get offdiagIdx[] for implicit 0.0 */ 25039566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504ce496241SStefano Zampini ba = bav; 25051a254869SHong Zhang bi = b->i; 25061a254869SHong Zhang bj = b->j; 25079566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 25081a254869SHong Zhang for (r = 0; r < m; r++) { 25091a254869SHong Zhang ncols = bi[r + 1] - bi[r]; 25101a254869SHong Zhang if (ncols == A->cmap->N - n) { /* Brow is dense */ 25119371c9d4SSatish Balay offdiagA[r] = *ba; 25129371c9d4SSatish Balay offdiagIdx[r] = cmap[0]; 25131a254869SHong Zhang } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 25141a254869SHong Zhang offdiagA[r] = 0.0; 25151a254869SHong Zhang 25161a254869SHong Zhang /* Find first hole in the cmap */ 25171a254869SHong Zhang for (j = 0; j < ncols; j++) { 25181a254869SHong Zhang col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 25191a254869SHong Zhang if (col > j && j < cstart) { 25201a254869SHong Zhang offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 25211a254869SHong Zhang break; 25221a254869SHong Zhang } else if (col > j + n && j >= cstart) { 25231a254869SHong Zhang offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 25241a254869SHong Zhang break; 25251a254869SHong Zhang } 25261a254869SHong Zhang } 25274e879edeSHong Zhang if (j == ncols && ncols < A->cmap->N - n) { 25281a254869SHong Zhang /* a hole is outside compressed Bcols */ 25291a254869SHong Zhang if (ncols == 0) { 25301a254869SHong Zhang if (cstart) { 25311a254869SHong Zhang offdiagIdx[r] = 0; 25321a254869SHong Zhang } else offdiagIdx[r] = cend; 25331a254869SHong Zhang } else { /* ncols > 0 */ 25341a254869SHong Zhang offdiagIdx[r] = cmap[ncols - 1] + 1; 25351a254869SHong Zhang if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 25361a254869SHong Zhang } 25371a254869SHong Zhang } 25381a254869SHong Zhang } 25391a254869SHong Zhang 25401a254869SHong Zhang for (j = 0; j < ncols; j++) { 25419371c9d4SSatish Balay if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 25429371c9d4SSatish Balay offdiagA[r] = *ba; 25439371c9d4SSatish Balay offdiagIdx[r] = cmap[*bj]; 25449371c9d4SSatish Balay } 25459371c9d4SSatish Balay ba++; 25469371c9d4SSatish Balay bj++; 25471a254869SHong Zhang } 25481a254869SHong Zhang } 25491a254869SHong Zhang 25509566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &a)); 25519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 25521a254869SHong Zhang for (r = 0; r < m; ++r) { 25531a254869SHong Zhang if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554c87e5d42SMatthew Knepley a[r] = diagA[r]; 25551a254869SHong Zhang if (idx) idx[r] = cstart + diagIdx[r]; 25561a254869SHong Zhang } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 25571a254869SHong Zhang a[r] = diagA[r]; 25581a254869SHong Zhang if (idx) { 25591a254869SHong Zhang if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560c87e5d42SMatthew Knepley idx[r] = cstart + diagIdx[r]; 25611a254869SHong Zhang } else idx[r] = offdiagIdx[r]; 25621a254869SHong Zhang } 2563c87e5d42SMatthew Knepley } else { 2564c87e5d42SMatthew Knepley a[r] = offdiagA[r]; 25651a254869SHong Zhang if (idx) idx[r] = offdiagIdx[r]; 2566c87e5d42SMatthew Knepley } 2567c87e5d42SMatthew Knepley } 25689566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 25699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(v, &a)); 25709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 25719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 25729566063dSJacob Faibussowitsch PetscCall(VecDestroy(&diagV)); 25739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&offdiagV)); 25749566063dSJacob Faibussowitsch PetscCall(PetscFree2(diagIdx, offdiagIdx)); 25753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2576c87e5d42SMatthew Knepley } 2577c87e5d42SMatthew Knepley 2578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579d71ae5a4SJacob Faibussowitsch { 2580f6d58c54SBarry Smith Mat *dummy; 25815494a064SHong Zhang 25825494a064SHong Zhang PetscFunctionBegin; 25839566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584f6d58c54SBarry Smith *newmat = *dummy; 25859566063dSJacob Faibussowitsch PetscCall(PetscFree(dummy)); 25863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25875494a064SHong Zhang } 25885494a064SHong Zhang 2589ba38deedSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590d71ae5a4SJacob Faibussowitsch { 2591bbead8a2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592bbead8a2SBarry Smith 2593bbead8a2SBarry Smith PetscFunctionBegin; 25949566063dSJacob Faibussowitsch PetscCall(MatInvertBlockDiagonal(a->A, values)); 25957b6c816cSBarry Smith A->factorerrortype = a->A->factorerrortype; 25963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2597bbead8a2SBarry Smith } 2598bbead8a2SBarry Smith 2599d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600d71ae5a4SJacob Faibussowitsch { 260173a71a0fSBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 260273a71a0fSBarry Smith 260373a71a0fSBarry Smith PetscFunctionBegin; 260408401ef6SPierre Jolivet PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 26059566063dSJacob Faibussowitsch PetscCall(MatSetRandom(aij->A, rctx)); 2606679944adSJunchao Zhang if (x->assembled) { 26079566063dSJacob Faibussowitsch PetscCall(MatSetRandom(aij->B, rctx)); 2608679944adSJunchao Zhang } else { 26099566063dSJacob Faibussowitsch PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610679944adSJunchao Zhang } 26119566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 26129566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 26133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 261473a71a0fSBarry Smith } 2615bbead8a2SBarry Smith 2616ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617d71ae5a4SJacob Faibussowitsch { 2618b1b1104fSBarry Smith PetscFunctionBegin; 2619b1b1104fSBarry Smith if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620b1b1104fSBarry Smith else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 26213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2622b1b1104fSBarry Smith } 2623b1b1104fSBarry Smith 2624b1b1104fSBarry Smith /*@ 2625f2afee66SBarry Smith MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626f2afee66SBarry Smith 26272ef1f0ffSBarry Smith Not Collective 2628f2afee66SBarry Smith 2629f2afee66SBarry Smith Input Parameter: 2630f2afee66SBarry Smith . A - the matrix 2631f2afee66SBarry Smith 2632f2afee66SBarry Smith Output Parameter: 2633f2afee66SBarry Smith . nz - the number of nonzeros 2634f2afee66SBarry Smith 2635f2afee66SBarry Smith Level: advanced 2636f2afee66SBarry Smith 2637fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2638f2afee66SBarry Smith @*/ 2639d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640d71ae5a4SJacob Faibussowitsch { 2641f2afee66SBarry Smith Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642f2afee66SBarry Smith Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643dfebb78cSStefano Zampini PetscBool isaij; 2644f2afee66SBarry Smith 2645f2afee66SBarry Smith PetscFunctionBegin; 2646dfebb78cSStefano Zampini PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2647dfebb78cSStefano Zampini PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2648f2afee66SBarry Smith *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 26493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2650f2afee66SBarry Smith } 2651f2afee66SBarry Smith 2652f2afee66SBarry Smith /*@ 2653b1b1104fSBarry Smith MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2654b1b1104fSBarry Smith 2655c3339decSBarry Smith Collective 2656b1b1104fSBarry Smith 2657b1b1104fSBarry Smith Input Parameters: 2658b1b1104fSBarry Smith + A - the matrix 265911a5261eSBarry Smith - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2660b1b1104fSBarry Smith 266196a0c994SBarry Smith Level: advanced 266296a0c994SBarry Smith 2663fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2664b1b1104fSBarry Smith @*/ 2665d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2666d71ae5a4SJacob Faibussowitsch { 2667b1b1104fSBarry Smith PetscFunctionBegin; 2668cac4c232SBarry Smith PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 26693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2670b1b1104fSBarry Smith } 2671b1b1104fSBarry Smith 2672d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2673d71ae5a4SJacob Faibussowitsch { 2674b1b1104fSBarry Smith PetscBool sc = PETSC_FALSE, flg; 2675b1b1104fSBarry Smith 2676b1b1104fSBarry Smith PetscFunctionBegin; 2677d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2678b1b1104fSBarry Smith if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 26799566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 26801baa6e33SBarry Smith if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2681d0609cedSBarry Smith PetscOptionsHeadEnd(); 26823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2683b1b1104fSBarry Smith } 2684b1b1104fSBarry Smith 2685ba38deedSJacob Faibussowitsch static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2686d71ae5a4SJacob Faibussowitsch { 26877d68702bSBarry Smith Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2688c5e4d11fSDmitry Karpeev Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 26897d68702bSBarry Smith 26907d68702bSBarry Smith PetscFunctionBegin; 2691c5e4d11fSDmitry Karpeev if (!Y->preallocated) { 26929566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 26935519a089SJose E. Roman } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2694b83222d8SBarry Smith PetscInt nonew = aij->nonew; 26959566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2696b83222d8SBarry Smith aij->nonew = nonew; 26977d68702bSBarry Smith } 26989566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 26993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27007d68702bSBarry Smith } 27017d68702bSBarry Smith 2702ba38deedSJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2703d71ae5a4SJacob Faibussowitsch { 27043b49f96aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 27053b49f96aSBarry Smith 27063b49f96aSBarry Smith PetscFunctionBegin; 270708401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 27089566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A, missing, d)); 27093b49f96aSBarry Smith if (d) { 27103b49f96aSBarry Smith PetscInt rstart; 27119566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 27123b49f96aSBarry Smith *d += rstart; 27133b49f96aSBarry Smith } 27143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27153b49f96aSBarry Smith } 27163b49f96aSBarry Smith 2717ba38deedSJacob Faibussowitsch static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2718d71ae5a4SJacob Faibussowitsch { 2719a8ee9fb5SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2720a8ee9fb5SBarry Smith 2721a8ee9fb5SBarry Smith PetscFunctionBegin; 27229566063dSJacob Faibussowitsch PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 27233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2724a8ee9fb5SBarry Smith } 27253b49f96aSBarry Smith 272658c11ad4SPierre Jolivet static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2727dec0b466SHong Zhang { 2728dec0b466SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2729dec0b466SHong Zhang 2730dec0b466SHong Zhang PetscFunctionBegin; 273158c11ad4SPierre Jolivet PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 273258c11ad4SPierre Jolivet PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 27333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2734dec0b466SHong Zhang } 2735dec0b466SHong Zhang 2736cda55fadSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2737cda55fadSBarry Smith MatGetRow_MPIAIJ, 2738cda55fadSBarry Smith MatRestoreRow_MPIAIJ, 2739cda55fadSBarry Smith MatMult_MPIAIJ, 274097304618SKris Buschelman /* 4*/ MatMultAdd_MPIAIJ, 27417c922b88SBarry Smith MatMultTranspose_MPIAIJ, 27427c922b88SBarry Smith MatMultTransposeAdd_MPIAIJ, 2743f4259b30SLisandro Dalcin NULL, 2744f4259b30SLisandro Dalcin NULL, 2745f4259b30SLisandro Dalcin NULL, 2746f4259b30SLisandro Dalcin /*10*/ NULL, 2747f4259b30SLisandro Dalcin NULL, 2748f4259b30SLisandro Dalcin NULL, 274941f059aeSBarry Smith MatSOR_MPIAIJ, 2750b7c46309SBarry Smith MatTranspose_MPIAIJ, 275197304618SKris Buschelman /*15*/ MatGetInfo_MPIAIJ, 2752cda55fadSBarry Smith MatEqual_MPIAIJ, 2753cda55fadSBarry Smith MatGetDiagonal_MPIAIJ, 2754cda55fadSBarry Smith MatDiagonalScale_MPIAIJ, 2755cda55fadSBarry Smith MatNorm_MPIAIJ, 275697304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIAIJ, 2757cda55fadSBarry Smith MatAssemblyEnd_MPIAIJ, 2758cda55fadSBarry Smith MatSetOption_MPIAIJ, 2759cda55fadSBarry Smith MatZeroEntries_MPIAIJ, 2760d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIAIJ, 2761f4259b30SLisandro Dalcin NULL, 2762f4259b30SLisandro Dalcin NULL, 2763f4259b30SLisandro Dalcin NULL, 2764f4259b30SLisandro Dalcin NULL, 276526cec326SBarry Smith /*29*/ MatSetUp_MPI_Hash, 2766f4259b30SLisandro Dalcin NULL, 2767f4259b30SLisandro Dalcin NULL, 2768a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPIAIJ, 2769f4259b30SLisandro Dalcin NULL, 2770d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIAIJ, 2771f4259b30SLisandro Dalcin NULL, 2772f4259b30SLisandro Dalcin NULL, 2773f4259b30SLisandro Dalcin NULL, 2774f4259b30SLisandro Dalcin NULL, 2775d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIAIJ, 27767dae84e0SHong Zhang MatCreateSubMatrices_MPIAIJ, 2777cda55fadSBarry Smith MatIncreaseOverlap_MPIAIJ, 2778cda55fadSBarry Smith MatGetValues_MPIAIJ, 2779cb5b572fSBarry Smith MatCopy_MPIAIJ, 2780d519adbfSMatthew Knepley /*44*/ MatGetRowMax_MPIAIJ, 2781cda55fadSBarry Smith MatScale_MPIAIJ, 27827d68702bSBarry Smith MatShift_MPIAIJ, 278399e65526SBarry Smith MatDiagonalSet_MPIAIJ, 2784564f14d6SBarry Smith MatZeroRowsColumns_MPIAIJ, 278573a71a0fSBarry Smith /*49*/ MatSetRandom_MPIAIJ, 27868a9c020eSBarry Smith MatGetRowIJ_MPIAIJ, 27878a9c020eSBarry Smith MatRestoreRowIJ_MPIAIJ, 2788f4259b30SLisandro Dalcin NULL, 2789f4259b30SLisandro Dalcin NULL, 279093dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2791f4259b30SLisandro Dalcin NULL, 2792cda55fadSBarry Smith MatSetUnfactored_MPIAIJ, 279372e6a0cfSJed Brown MatPermute_MPIAIJ, 2794f4259b30SLisandro Dalcin NULL, 27957dae84e0SHong Zhang /*59*/ MatCreateSubMatrix_MPIAIJ, 2796e03a110bSBarry Smith MatDestroy_MPIAIJ, 2797e03a110bSBarry Smith MatView_MPIAIJ, 2798f4259b30SLisandro Dalcin NULL, 2799f4259b30SLisandro Dalcin NULL, 2800f4259b30SLisandro Dalcin /*64*/ NULL, 2801f996eeb8SHong Zhang MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2802f4259b30SLisandro Dalcin NULL, 2803f4259b30SLisandro Dalcin NULL, 2804f4259b30SLisandro Dalcin NULL, 2805d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIAIJ, 2806c87e5d42SMatthew Knepley MatGetRowMinAbs_MPIAIJ, 2807f4259b30SLisandro Dalcin NULL, 2808f4259b30SLisandro Dalcin NULL, 2809f4259b30SLisandro Dalcin NULL, 2810f4259b30SLisandro Dalcin NULL, 28113acb8795SBarry Smith /*75*/ MatFDColoringApply_AIJ, 2812b1b1104fSBarry Smith MatSetFromOptions_MPIAIJ, 2813f4259b30SLisandro Dalcin NULL, 2814f4259b30SLisandro Dalcin NULL, 2815f1f41ecbSJed Brown MatFindZeroDiagonals_MPIAIJ, 2816f4259b30SLisandro Dalcin /*80*/ NULL, 2817f4259b30SLisandro Dalcin NULL, 2818f4259b30SLisandro Dalcin NULL, 28195bba2384SShri Abhyankar /*83*/ MatLoad_MPIAIJ, 28206cff0a6bSPierre Jolivet NULL, 2821f4259b30SLisandro Dalcin NULL, 2822f4259b30SLisandro Dalcin NULL, 2823f4259b30SLisandro Dalcin NULL, 2824f4259b30SLisandro Dalcin NULL, 2825f4259b30SLisandro Dalcin /*89*/ NULL, 2826f4259b30SLisandro Dalcin NULL, 282726be0446SHong Zhang MatMatMultNumeric_MPIAIJ_MPIAIJ, 2828f4259b30SLisandro Dalcin NULL, 2829f4259b30SLisandro Dalcin NULL, 2830cf3ca8ceSHong Zhang /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2831f4259b30SLisandro Dalcin NULL, 2832f4259b30SLisandro Dalcin NULL, 2833f4259b30SLisandro Dalcin NULL, 2834b470e4b4SRichard Tran Mills MatBindToCPU_MPIAIJ, 28354222ddf1SHong Zhang /*99*/ MatProductSetFromOptions_MPIAIJ, 2836f4259b30SLisandro Dalcin NULL, 2837f4259b30SLisandro Dalcin NULL, 28382fd7e33dSBarry Smith MatConjugate_MPIAIJ, 2839f4259b30SLisandro Dalcin NULL, 2840d519adbfSMatthew Knepley /*104*/ MatSetValuesRow_MPIAIJ, 284199cafbc1SBarry Smith MatRealPart_MPIAIJ, 284269db28dcSHong Zhang MatImaginaryPart_MPIAIJ, 2843f4259b30SLisandro Dalcin NULL, 2844f4259b30SLisandro Dalcin NULL, 2845f4259b30SLisandro Dalcin /*109*/ NULL, 2846f4259b30SLisandro Dalcin NULL, 28475494a064SHong Zhang MatGetRowMin_MPIAIJ, 2848f4259b30SLisandro Dalcin NULL, 28493b49f96aSBarry Smith MatMissingDiagonal_MPIAIJ, 2850d1adec66SJed Brown /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2851f4259b30SLisandro Dalcin NULL, 2852c5e4d11fSDmitry Karpeev MatGetGhosts_MPIAIJ, 2853f4259b30SLisandro Dalcin NULL, 2854f4259b30SLisandro Dalcin NULL, 2855b215bc84SStefano Zampini /*119*/ MatMultDiagonalBlock_MPIAIJ, 2856f4259b30SLisandro Dalcin NULL, 2857f4259b30SLisandro Dalcin NULL, 2858f4259b30SLisandro Dalcin NULL, 2859b9614d88SDmitry Karpeev MatGetMultiProcBlock_MPIAIJ, 2860f2c98031SJed Brown /*124*/ MatFindNonzeroRows_MPIAIJ, 2861a873a8cdSSam Reynolds MatGetColumnReductions_MPIAIJ, 2862bbead8a2SBarry Smith MatInvertBlockDiagonal_MPIAIJ, 2863a8ee9fb5SBarry Smith MatInvertVariableBlockDiagonal_MPIAIJ, 28647dae84e0SHong Zhang MatCreateSubMatricesMPI_MPIAIJ, 2865f4259b30SLisandro Dalcin /*129*/ NULL, 2866f4259b30SLisandro Dalcin NULL, 2867f4259b30SLisandro Dalcin NULL, 2868187b3c17SHong Zhang MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2869f4259b30SLisandro Dalcin NULL, 2870f4259b30SLisandro Dalcin /*134*/ NULL, 2871f4259b30SLisandro Dalcin NULL, 2872f4259b30SLisandro Dalcin NULL, 2873f4259b30SLisandro Dalcin NULL, 2874f4259b30SLisandro Dalcin NULL, 287546533700Sstefano_zampini /*139*/ MatSetBlockSizes_MPIAIJ, 2876f4259b30SLisandro Dalcin NULL, 2877f4259b30SLisandro Dalcin NULL, 28789c8f2541SHong Zhang MatFDColoringSetUp_MPIXAIJ, 2879a0b6529bSBarry Smith MatFindOffBlockDiagonalEntries_MPIAIJ, 28804222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2881f4259b30SLisandro Dalcin /*145*/ NULL, 2882f4259b30SLisandro Dalcin NULL, 288372833a62Smarkadams4 NULL, 288472833a62Smarkadams4 MatCreateGraph_Simple_AIJ, 28852d776b49SBarry Smith NULL, 2886dec0b466SHong Zhang /*150*/ NULL, 2887eede4a3fSMark Adams MatEliminateZeros_MPIAIJ, 28884cc2b5b5SPierre Jolivet MatGetRowSumAbs_MPIAIJ, 288942ce410bSJunchao Zhang NULL, 289042ce410bSJunchao Zhang NULL, 2891fe1fc275SAlexander /*155*/ NULL, 2892fe1fc275SAlexander MatCopyHashToXAIJ_MPI_Hash}; 289336ce4990SBarry Smith 2894ba38deedSJacob Faibussowitsch static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2895d71ae5a4SJacob Faibussowitsch { 28962e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 28972e8a6d31SBarry Smith 28982e8a6d31SBarry Smith PetscFunctionBegin; 28999566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 29009566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 29013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29022e8a6d31SBarry Smith } 29032e8a6d31SBarry Smith 2904ba38deedSJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2905d71ae5a4SJacob Faibussowitsch { 29062e8a6d31SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 29072e8a6d31SBarry Smith 29082e8a6d31SBarry Smith PetscFunctionBegin; 29099566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 29109566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 29113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29122e8a6d31SBarry Smith } 29138a729477SBarry Smith 2914d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2915d71ae5a4SJacob Faibussowitsch { 2916ad79cf63SBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 29175d2a9ed1SStefano Zampini PetscMPIInt size; 2918a23d5eceSKris Buschelman 2919a23d5eceSKris Buschelman PetscFunctionBegin; 2920ad79cf63SBarry Smith if (B->hash_active) { 2921aea10558SJacob Faibussowitsch B->ops[0] = b->cops; 2922ad79cf63SBarry Smith B->hash_active = PETSC_FALSE; 2923ad79cf63SBarry Smith } 29249566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 29259566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 2926899cda47SBarry Smith 2927cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 2928eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&b->colmap)); 2929cb7b82ddSBarry Smith #else 29309566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 2931cb7b82ddSBarry Smith #endif 29329566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 29339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 29349566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 2935cb7b82ddSBarry Smith 29369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2937c508b908SBarry Smith 2938c508b908SBarry Smith MatSeqXAIJGetOptions_Private(b->B); 29399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 29409566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 29419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 29429566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 29439566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQAIJ)); 2944c508b908SBarry Smith MatSeqXAIJRestoreOptions_Private(b->B); 2945cb7b82ddSBarry Smith 2946c508b908SBarry Smith MatSeqXAIJGetOptions_Private(b->A); 2947ad79cf63SBarry Smith PetscCall(MatDestroy(&b->A)); 29489566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 29499566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 29509566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 29519566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQAIJ)); 2952c508b908SBarry Smith MatSeqXAIJRestoreOptions_Private(b->A); 2953899cda47SBarry Smith 29549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 29559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2956526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2957cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 295815001458SStefano Zampini B->assembled = PETSC_FALSE; 29593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2960a23d5eceSKris Buschelman } 2961a23d5eceSKris Buschelman 2962ba38deedSJacob Faibussowitsch static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2963d71ae5a4SJacob Faibussowitsch { 2964ad79cf63SBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2965846b4da1SFande Kong 2966846b4da1SFande Kong PetscFunctionBegin; 2967846b4da1SFande Kong PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 29689566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 29699566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 2970846b4da1SFande Kong 2971846b4da1SFande Kong #if defined(PETSC_USE_CTABLE) 2972eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&b->colmap)); 2973846b4da1SFande Kong #else 29749566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 2975846b4da1SFande Kong #endif 29769566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 29779566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 29789566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 2979846b4da1SFande Kong 29809566063dSJacob Faibussowitsch PetscCall(MatResetPreallocation(b->A)); 29819566063dSJacob Faibussowitsch PetscCall(MatResetPreallocation(b->B)); 2982846b4da1SFande Kong B->preallocated = PETSC_TRUE; 2983846b4da1SFande Kong B->was_assembled = PETSC_FALSE; 2984846b4da1SFande Kong B->assembled = PETSC_FALSE; 29853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2986846b4da1SFande Kong } 2987846b4da1SFande Kong 2988d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2989d71ae5a4SJacob Faibussowitsch { 2990d6dfbf8fSBarry Smith Mat mat; 2991416022c9SBarry Smith Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2992d6dfbf8fSBarry Smith 29933a40ed3dSBarry Smith PetscFunctionBegin; 2994f4259b30SLisandro Dalcin *newmat = NULL; 29959566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 29969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 29979566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 29989566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2999273d9f13SBarry Smith a = (Mat_MPIAIJ *)mat->data; 3000e1b6402fSHong Zhang 3001d5f3da31SBarry Smith mat->factortype = matin->factortype; 3002501880eeSStefano Zampini mat->assembled = matin->assembled; 3003e7641de0SSatish Balay mat->insertmode = NOT_SET_VALUES; 3004d6dfbf8fSBarry Smith 300517699dbbSLois Curfman McInnes a->size = oldmat->size; 300617699dbbSLois Curfman McInnes a->rank = oldmat->rank; 3007e7641de0SSatish Balay a->donotstash = oldmat->donotstash; 3008e7641de0SSatish Balay a->roworiented = oldmat->roworiented; 3009501880eeSStefano Zampini a->rowindices = NULL; 3010501880eeSStefano Zampini a->rowvalues = NULL; 3011bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 3012d6dfbf8fSBarry Smith 30139566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 30149566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3015420957c1SBarry Smith if (matin->hash_active) { 3016420957c1SBarry Smith PetscCall(MatSetUp(mat)); 3017420957c1SBarry Smith } else { 3018420957c1SBarry Smith mat->preallocated = matin->preallocated; 30192ee70a88SLois Curfman McInnes if (oldmat->colmap) { 3020aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 3021eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3022b1fc9764SSatish Balay #else 30239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 30249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3025b1fc9764SSatish Balay #endif 3026501880eeSStefano Zampini } else a->colmap = NULL; 30273f41c07dSBarry Smith if (oldmat->garray) { 3028b1d57f15SBarry Smith PetscInt len; 3029d0f46423SBarry Smith len = oldmat->B->cmap->n; 30309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len + 1, &a->garray)); 30319566063dSJacob Faibussowitsch if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3032501880eeSStefano Zampini } else a->garray = NULL; 3033d6dfbf8fSBarry Smith 30340de76c62SStefano Zampini /* It may happen MatDuplicate is called with a non-assembled matrix 30350de76c62SStefano Zampini In fact, MatDuplicate only requires the matrix to be preallocated 30360de76c62SStefano Zampini This may happen inside a DMCreateMatrix_Shell */ 3037aa624791SPierre Jolivet if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3038cff58d65SJunchao Zhang if (oldmat->Mvctx) { 3039cff58d65SJunchao Zhang a->Mvctx = oldmat->Mvctx; 3040cff58d65SJunchao Zhang PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3041cff58d65SJunchao Zhang } 30429566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 30439566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3044420957c1SBarry Smith } 30459566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 30468a729477SBarry Smith *newmat = mat; 30473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30488a729477SBarry Smith } 3049416022c9SBarry Smith 3050d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3051d71ae5a4SJacob Faibussowitsch { 305252f91c60SVaclav Hapla PetscBool isbinary, ishdf5; 305352f91c60SVaclav Hapla 305452f91c60SVaclav Hapla PetscFunctionBegin; 305552f91c60SVaclav Hapla PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 305652f91c60SVaclav Hapla PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3057c27b3999SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 30589566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 30599566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 30609566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 306152f91c60SVaclav Hapla if (isbinary) { 30629566063dSJacob Faibussowitsch PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 306352f91c60SVaclav Hapla } else if (ishdf5) { 306452f91c60SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 30659566063dSJacob Faibussowitsch PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 306652f91c60SVaclav Hapla #else 306752f91c60SVaclav Hapla SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 306852f91c60SVaclav Hapla #endif 306952f91c60SVaclav Hapla } else { 307098921bdaSJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 307152f91c60SVaclav Hapla } 30723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 307352f91c60SVaclav Hapla } 307452f91c60SVaclav Hapla 3075d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3076d71ae5a4SJacob Faibussowitsch { 30773ea6fe3dSLisandro Dalcin PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 30783ea6fe3dSLisandro Dalcin PetscInt *rowidxs, *colidxs; 30793ea6fe3dSLisandro Dalcin PetscScalar *matvals; 30808fb81238SShri Abhyankar 30818fb81238SShri Abhyankar PetscFunctionBegin; 30829566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 30838fb81238SShri Abhyankar 30843ea6fe3dSLisandro Dalcin /* read in matrix header */ 30859566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 308608401ef6SPierre Jolivet PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 30879371c9d4SSatish Balay M = header[1]; 30889371c9d4SSatish Balay N = header[2]; 30899371c9d4SSatish Balay nz = header[3]; 309008401ef6SPierre Jolivet PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 309108401ef6SPierre Jolivet PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 309208401ef6SPierre Jolivet PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 309308ea439dSMark F. Adams 30943ea6fe3dSLisandro Dalcin /* set block sizes from the viewer's .info file */ 30959566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 30963ea6fe3dSLisandro Dalcin /* set global sizes if not set already */ 30973ea6fe3dSLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 30983ea6fe3dSLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 30999566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 31009566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 31018fb81238SShri Abhyankar 31023ea6fe3dSLisandro Dalcin /* check if the matrix sizes are correct */ 31039566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 3104aed4548fSBarry Smith PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 31058fb81238SShri Abhyankar 31063ea6fe3dSLisandro Dalcin /* read in row lengths and build row indices */ 31079566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, NULL)); 31089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 31099566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 31109371c9d4SSatish Balay rowidxs[0] = 0; 31119371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 31121690c2aeSBarry Smith if (nz != PETSC_INT_MAX) { 3113462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 311408401ef6SPierre Jolivet PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 311538b83642SBarry Smith } 311638b83642SBarry Smith 31173ea6fe3dSLisandro Dalcin /* read in column indices and matrix values */ 31189566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 31199566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 31209566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 31213ea6fe3dSLisandro Dalcin /* store matrix indices and values */ 31229566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 31239566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 31249566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 31253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31268fb81238SShri Abhyankar } 31278fb81238SShri Abhyankar 31283782ecc7SHong Zhang /* Not scalable because of ISAllGather() unless getting all columns. */ 3129ba38deedSJacob Faibussowitsch static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3130d71ae5a4SJacob Faibussowitsch { 31314aa3045dSJed Brown IS iscol_local; 3132c5e4d11fSDmitry Karpeev PetscBool isstride; 3133c5e4d11fSDmitry Karpeev PetscMPIInt lisstride = 0, gisstride; 31343782ecc7SHong Zhang 31353782ecc7SHong Zhang PetscFunctionBegin; 31363782ecc7SHong Zhang /* check if we are grabbing all columns*/ 31379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 31383782ecc7SHong Zhang 3139c5e4d11fSDmitry Karpeev if (isstride) { 3140c5e4d11fSDmitry Karpeev PetscInt start, len, mstart, mlen; 31419566063dSJacob Faibussowitsch PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 31429566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &len)); 31439566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3144c5e4d11fSDmitry Karpeev if (mstart == start && mlen - mstart == len) lisstride = 1; 3145c5e4d11fSDmitry Karpeev } 31463782ecc7SHong Zhang 3147462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3148c5e4d11fSDmitry Karpeev if (gisstride) { 3149c5e4d11fSDmitry Karpeev PetscInt N; 31509566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, NULL, &N)); 31519566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 31529566063dSJacob Faibussowitsch PetscCall(ISSetIdentity(iscol_local)); 31539566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3154c5e4d11fSDmitry Karpeev } else { 3155c5bfad50SMark F. Adams PetscInt cbs; 31569566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol, &cbs)); 31579566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 31589566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(iscol_local, cbs)); 3159b79d0421SJed Brown } 31603782ecc7SHong Zhang 31613782ecc7SHong Zhang *isseq = iscol_local; 31623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3163c5e4d11fSDmitry Karpeev } 31648d2139bdSHong Zhang 3165ddfdf956SHong Zhang /* 31669c988bcaSHong Zhang Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 31679c988bcaSHong Zhang (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3168ddfdf956SHong Zhang 3169ddfdf956SHong Zhang Input Parameters: 317027430b45SBarry Smith + mat - matrix 317127430b45SBarry Smith . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 31729c988bcaSHong Zhang i.e., mat->rstart <= isrow[i] < mat->rend 317327430b45SBarry Smith - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3174ddfdf956SHong Zhang i.e., mat->cstart <= iscol[i] < mat->cend 317527430b45SBarry Smith 317627430b45SBarry Smith Output Parameters: 317727430b45SBarry Smith + isrow_d - sequential row index set for retrieving mat->A 317827430b45SBarry Smith . iscol_d - sequential column index set for retrieving mat->A 317927430b45SBarry Smith . iscol_o - sequential column index set for retrieving mat->B 318027430b45SBarry Smith - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3181ddfdf956SHong Zhang */ 3182835f2295SStefano Zampini static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3183d71ae5a4SJacob Faibussowitsch { 3184040216a4SHong Zhang Vec x, cmap; 3185040216a4SHong Zhang const PetscInt *is_idx; 3186040216a4SHong Zhang PetscScalar *xarray, *cmaparray; 31879c988bcaSHong Zhang PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3188040216a4SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3189040216a4SHong Zhang Mat B = a->B; 3190040216a4SHong Zhang Vec lvec = a->lvec, lcmap; 3191a31a438cSHong Zhang PetscInt i, cstart, cend, Bn = B->cmap->N; 31928b3fa1f7SHong Zhang MPI_Comm comm; 31933a8d973cSHong Zhang VecScatter Mvctx = a->Mvctx; 31943782ecc7SHong Zhang 31953782ecc7SHong Zhang PetscFunctionBegin; 31969566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 31979566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &ncols)); 31988b3fa1f7SHong Zhang 3199ddfdf956SHong Zhang /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 32009566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(mat, &x, NULL)); 32019566063dSJacob Faibussowitsch PetscCall(VecSet(x, -1.0)); 32029566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x, &cmap)); 32039566063dSJacob Faibussowitsch PetscCall(VecSet(cmap, -1.0)); 32040a351717SHong Zhang 32059c988bcaSHong Zhang /* Get start indices */ 32069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3207ddfdf956SHong Zhang isstart -= ncols; 32089566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3209040216a4SHong Zhang 32109566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol, &is_idx)); 32119566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &xarray)); 32129566063dSJacob Faibussowitsch PetscCall(VecGetArray(cmap, &cmaparray)); 32139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ncols, &idx)); 3214ddfdf956SHong Zhang for (i = 0; i < ncols; i++) { 32158b3fa1f7SHong Zhang xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3216ddfdf956SHong Zhang cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 32179c988bcaSHong Zhang idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 32188b3fa1f7SHong Zhang } 32199566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &xarray)); 32209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(cmap, &cmaparray)); 32219566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &is_idx)); 32228b3fa1f7SHong Zhang 32239c988bcaSHong Zhang /* Get iscol_d */ 32249566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 32259566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol, &i)); 32269566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(*iscol_d, i)); 3227feb78a15SHong Zhang 32289c988bcaSHong Zhang /* Get isrow_d */ 32299566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &m)); 3230feb78a15SHong Zhang rstart = mat->rmap->rstart; 32319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &idx)); 32329566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &is_idx)); 32339c988bcaSHong Zhang for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 32349566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &is_idx)); 3235feb78a15SHong Zhang 32369566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 32379566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(isrow, &i)); 32389566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(*isrow_d, i)); 3239feb78a15SHong Zhang 32409c988bcaSHong Zhang /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 32419566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 32429566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3243ddfdf956SHong Zhang 32449566063dSJacob Faibussowitsch PetscCall(VecDuplicate(lvec, &lcmap)); 324507250d77SHong Zhang 32469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 32479566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 324864efcef9SHong Zhang 32499c988bcaSHong Zhang /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3250ddfdf956SHong Zhang /* off-process column indices */ 32519c988bcaSHong Zhang count = 0; 32529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bn, &idx)); 32539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bn, &cmap1)); 3254feb78a15SHong Zhang 32559566063dSJacob Faibussowitsch PetscCall(VecGetArray(lvec, &xarray)); 32569566063dSJacob Faibussowitsch PetscCall(VecGetArray(lcmap, &cmaparray)); 32578b3fa1f7SHong Zhang for (i = 0; i < Bn; i++) { 3258f73421bfSHong Zhang if (PetscRealPart(xarray[i]) > -1.0) { 32599c988bcaSHong Zhang idx[count] = i; /* local column index in off-diagonal part B */ 32601c645242SHong Zhang cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 32611c645242SHong Zhang count++; 32628b3fa1f7SHong Zhang } 32638b3fa1f7SHong Zhang } 32649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lvec, &xarray)); 32659566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lcmap, &cmaparray)); 326607250d77SHong Zhang 32679566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3268b6d9b4e0SHong Zhang /* cannot ensure iscol_o has same blocksize as iscol! */ 3269b6d9b4e0SHong Zhang 32709566063dSJacob Faibussowitsch PetscCall(PetscFree(idx)); 32719c988bcaSHong Zhang *garray = cmap1; 32729c988bcaSHong Zhang 32739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 32749566063dSJacob Faibussowitsch PetscCall(VecDestroy(&cmap)); 32759566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lcmap)); 32763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3277040216a4SHong Zhang } 3278040216a4SHong Zhang 3279b20e2604SHong Zhang /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3280d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3281d71ae5a4SJacob Faibussowitsch { 3282b20e2604SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 32831fd43edeSHong Zhang Mat M = NULL; 32843b00a383SHong Zhang MPI_Comm comm; 3285b20e2604SHong Zhang IS iscol_d, isrow_d, iscol_o; 32863b00a383SHong Zhang Mat Asub = NULL, Bsub = NULL; 3287b20e2604SHong Zhang PetscInt n; 32883b00a383SHong Zhang 32893b00a383SHong Zhang PetscFunctionBegin; 32909566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 32913b00a383SHong Zhang 32923b00a383SHong Zhang if (call == MAT_REUSE_MATRIX) { 3293b20e2604SHong Zhang /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 32949566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 329528b400f6SJacob Faibussowitsch PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 32963b00a383SHong Zhang 32979566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 329828b400f6SJacob Faibussowitsch PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 32993b00a383SHong Zhang 33009566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 330128b400f6SJacob Faibussowitsch PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 33023b00a383SHong Zhang 3303b20e2604SHong Zhang /* Update diagonal and off-diagonal portions of submat */ 3304b20e2604SHong Zhang asub = (Mat_MPIAIJ *)(*submat)->data; 33059566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 33069566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_o, &n)); 330748a46eb9SPierre Jolivet if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 33089566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 33099566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 33103b00a383SHong Zhang 33113b00a383SHong Zhang } else { /* call == MAT_INITIAL_MATRIX) */ 3312835f2295SStefano Zampini PetscInt *garray; 3313b20e2604SHong Zhang PetscInt BsubN; 33143b00a383SHong Zhang 3315b20e2604SHong Zhang /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 33169566063dSJacob Faibussowitsch PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 33173b00a383SHong Zhang 3318b20e2604SHong Zhang /* Create local submatrices Asub and Bsub */ 33199566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 33209566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 33213b00a383SHong Zhang 33229c988bcaSHong Zhang /* Create submatrix M */ 33239566063dSJacob Faibussowitsch PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 33243b00a383SHong Zhang 3325b20e2604SHong Zhang /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3326b20e2604SHong Zhang asub = (Mat_MPIAIJ *)M->data; 33277cfce09cSHong Zhang 33289566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3329b20e2604SHong Zhang n = asub->B->cmap->N; 3330b20e2604SHong Zhang if (BsubN > n) { 3331c4762a1bSJed Brown /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 33327cfce09cSHong Zhang const PetscInt *idx; 33339c988bcaSHong Zhang PetscInt i, j, *idx_new, *subgarray = asub->garray; 33349566063dSJacob Faibussowitsch PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 33357cfce09cSHong Zhang 33369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &idx_new)); 33377cfce09cSHong Zhang j = 0; 33389566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_o, &idx)); 3339b20e2604SHong Zhang for (i = 0; i < n; i++) { 33407cfce09cSHong Zhang if (j >= BsubN) break; 33419c988bcaSHong Zhang while (subgarray[i] > garray[j]) j++; 33427cfce09cSHong Zhang 33439c988bcaSHong Zhang if (subgarray[i] == garray[j]) { 33447cfce09cSHong Zhang idx_new[i] = idx[j++]; 334598921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 33467cfce09cSHong Zhang } 33479566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_o, &idx)); 33487cfce09cSHong Zhang 33499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_o)); 33509566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 33517cfce09cSHong Zhang 3352b20e2604SHong Zhang } else if (BsubN < n) { 335398921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3354b20e2604SHong Zhang } 33557cfce09cSHong Zhang 33569566063dSJacob Faibussowitsch PetscCall(PetscFree(garray)); 3357b20e2604SHong Zhang *submat = M; 33583b00a383SHong Zhang 3359e489de8fSHong Zhang /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 33609566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 33619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrow_d)); 33623b00a383SHong Zhang 33639566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 33649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_d)); 33653b00a383SHong Zhang 33669566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 33679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_o)); 33683b00a383SHong Zhang } 33693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33703b00a383SHong Zhang } 33713b00a383SHong Zhang 3372d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3373d71ae5a4SJacob Faibussowitsch { 33741358a193SHong Zhang IS iscol_local = NULL, isrow_d; 33753782ecc7SHong Zhang PetscInt csize; 337618e627e3SHong Zhang PetscInt n, i, j, start, end; 33774a3daf6eSHong Zhang PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 33783782ecc7SHong Zhang MPI_Comm comm; 33793782ecc7SHong Zhang 33803782ecc7SHong Zhang PetscFunctionBegin; 3381bcae8d28SHong Zhang /* If isrow has same processor distribution as mat, 3382a31a438cSHong Zhang call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 33838f69fa7bSHong Zhang if (call == MAT_REUSE_MATRIX) { 33849566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3385d5761cdaSHong Zhang if (isrow_d) { 3386d5761cdaSHong Zhang sameRowDist = PETSC_TRUE; 3387d5761cdaSHong Zhang tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3388d5761cdaSHong Zhang } else { 33899566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3390d5761cdaSHong Zhang if (iscol_local) { 3391d5761cdaSHong Zhang sameRowDist = PETSC_TRUE; 3392d5761cdaSHong Zhang tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3393d5761cdaSHong Zhang } 3394d5761cdaSHong Zhang } 33958f69fa7bSHong Zhang } else { 3396e489de8fSHong Zhang /* Check if isrow has same processor distribution as mat */ 339718e627e3SHong Zhang sameDist[0] = PETSC_FALSE; 33989566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &n)); 33993782ecc7SHong Zhang if (!n) { 340018e627e3SHong Zhang sameDist[0] = PETSC_TRUE; 34013782ecc7SHong Zhang } else { 34029566063dSJacob Faibussowitsch PetscCall(ISGetMinMax(isrow, &i, &j)); 34039566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3404ad540459SPierre Jolivet if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 34058f69fa7bSHong Zhang } 34063782ecc7SHong Zhang 3407e489de8fSHong Zhang /* Check if iscol has same processor distribution as mat */ 340818e627e3SHong Zhang sameDist[1] = PETSC_FALSE; 34099566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &n)); 341018e627e3SHong Zhang if (!n) { 341118e627e3SHong Zhang sameDist[1] = PETSC_TRUE; 341218e627e3SHong Zhang } else { 34139566063dSJacob Faibussowitsch PetscCall(ISGetMinMax(iscol, &i, &j)); 34149566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 341518e627e3SHong Zhang if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 341618e627e3SHong Zhang } 341718e627e3SHong Zhang 34189566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3419462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 342018e627e3SHong Zhang sameRowDist = tsameDist[0]; 342118e627e3SHong Zhang } 342218e627e3SHong Zhang 342318e627e3SHong Zhang if (sameRowDist) { 3424b20e2604SHong Zhang if (tsameDist[1]) { /* sameRowDist & sameColDist */ 34253b00a383SHong Zhang /* isrow and iscol have same processor distribution as mat */ 34269566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 34273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3428b20e2604SHong Zhang } else { /* sameRowDist */ 34293b00a383SHong Zhang /* isrow has same processor distribution as mat */ 34301358a193SHong Zhang if (call == MAT_INITIAL_MATRIX) { 34311358a193SHong Zhang PetscBool sorted; 34329566063dSJacob Faibussowitsch PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 34339566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 34349566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol, &i)); 343508401ef6SPierre Jolivet PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 34361358a193SHong Zhang 34379566063dSJacob Faibussowitsch PetscCall(ISSorted(iscol_local, &sorted)); 34381358a193SHong Zhang if (sorted) { 34391358a193SHong Zhang /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 34409566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 34413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34423782ecc7SHong Zhang } 34431358a193SHong Zhang } else { /* call == MAT_REUSE_MATRIX */ 344448c0d076SHong Zhang IS iscol_sub; 34459566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 344648c0d076SHong Zhang if (iscol_sub) { 34479566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 34483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 344948c0d076SHong Zhang } 34501358a193SHong Zhang } 34511358a193SHong Zhang } 34521358a193SHong Zhang } 34533782ecc7SHong Zhang 3454bcae8d28SHong Zhang /* General case: iscol -> iscol_local which has global size of iscol */ 34553782ecc7SHong Zhang if (call == MAT_REUSE_MATRIX) { 34569566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 345728b400f6SJacob Faibussowitsch PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 34583782ecc7SHong Zhang } else { 345948a46eb9SPierre Jolivet if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 34601358a193SHong Zhang } 34613782ecc7SHong Zhang 34629566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 34639566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 34648f69fa7bSHong Zhang 3465b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 34669566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 34679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 3468b79d0421SJed Brown } 34693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34704aa3045dSJed Brown } 34714aa3045dSJed Brown 3472feb78a15SHong Zhang /*@C 347311a5261eSBarry Smith MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3474feb78a15SHong Zhang and "off-diagonal" part of the matrix in CSR format. 3475feb78a15SHong Zhang 3476d083f849SBarry Smith Collective 3477feb78a15SHong Zhang 3478feb78a15SHong Zhang Input Parameters: 3479feb78a15SHong Zhang + comm - MPI communicator 3480feb78a15SHong Zhang . A - "diagonal" portion of matrix 3481b20e2604SHong Zhang . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 34822ef1f0ffSBarry Smith - garray - global index of `B` columns 3483feb78a15SHong Zhang 3484feb78a15SHong Zhang Output Parameter: 34852ef1f0ffSBarry Smith . mat - the matrix, with input `A` as its local diagonal matrix 348627430b45SBarry Smith 3487feb78a15SHong Zhang Level: advanced 3488feb78a15SHong Zhang 3489feb78a15SHong Zhang Notes: 349011a5261eSBarry Smith See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 349111a5261eSBarry Smith 34922ef1f0ffSBarry Smith `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3493feb78a15SHong Zhang 34941cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3495feb78a15SHong Zhang @*/ 3496d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3497d71ae5a4SJacob Faibussowitsch { 3498feb78a15SHong Zhang Mat_MPIAIJ *maij; 3499e489de8fSHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3500a5348796SHong Zhang PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3501ce496241SStefano Zampini const PetscScalar *oa; 3502e489de8fSHong Zhang Mat Bnew; 3503feb78a15SHong Zhang PetscInt m, n, N; 35044ab4d6f4SRichard Tran Mills MatType mpi_mat_type; 3505feb78a15SHong Zhang 3506feb78a15SHong Zhang PetscFunctionBegin; 35079566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 35089566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 350908401ef6SPierre Jolivet PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 351037a5e0faSPierre Jolivet PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3511b6d9b4e0SHong Zhang /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 351208401ef6SPierre Jolivet /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3513feb78a15SHong Zhang 3514e489de8fSHong Zhang /* Get global columns of mat */ 3515462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3516feb78a15SHong Zhang 35179566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 35184ab4d6f4SRichard Tran Mills /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 35194ab4d6f4SRichard Tran Mills PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 35204ab4d6f4SRichard Tran Mills PetscCall(MatSetType(*mat, mpi_mat_type)); 35214ab4d6f4SRichard Tran Mills 352237a5e0faSPierre Jolivet if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3523feb78a15SHong Zhang maij = (Mat_MPIAIJ *)(*mat)->data; 3524feb78a15SHong Zhang 3525feb78a15SHong Zhang (*mat)->preallocated = PETSC_TRUE; 3526feb78a15SHong Zhang 35279566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->rmap)); 35289566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3529feb78a15SHong Zhang 3530e489de8fSHong Zhang /* Set A as diagonal portion of *mat */ 3531feb78a15SHong Zhang maij->A = A; 3532feb78a15SHong Zhang 3533a5348796SHong Zhang nz = oi[m]; 3534a5348796SHong Zhang for (i = 0; i < nz; i++) { 3535a5348796SHong Zhang col = oj[i]; 3536a5348796SHong Zhang oj[i] = garray[col]; 3537feb78a15SHong Zhang } 3538feb78a15SHong Zhang 3539e489de8fSHong Zhang /* Set Bnew as off-diagonal portion of *mat */ 35409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 35419566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 35429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3543e489de8fSHong Zhang bnew = (Mat_SeqAIJ *)Bnew->data; 3544e489de8fSHong Zhang bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3545e489de8fSHong Zhang maij->B = Bnew; 3546d5761cdaSHong Zhang 354708401ef6SPierre Jolivet PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3548d5761cdaSHong Zhang 3549d5761cdaSHong Zhang b->free_a = PETSC_FALSE; 3550d5761cdaSHong Zhang b->free_ij = PETSC_FALSE; 35519566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 3552d5761cdaSHong Zhang 3553e489de8fSHong Zhang bnew->free_a = PETSC_TRUE; 3554e489de8fSHong Zhang bnew->free_ij = PETSC_TRUE; 3555feb78a15SHong Zhang 3556a5348796SHong Zhang /* condense columns of maij->B */ 35579566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 35589566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 35599566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 35609566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 35619566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 35623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3563feb78a15SHong Zhang } 3564feb78a15SHong Zhang 3565ef514586SHong Zhang extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 35664aa3045dSJed Brown 3567d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3568d71ae5a4SJacob Faibussowitsch { 356998b658c4SHong Zhang PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 357085f27616SHong Zhang PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 357198b658c4SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 35721fd43edeSHong Zhang Mat M, Msub, B = a->B; 357398b658c4SHong Zhang MatScalar *aa; 357400e6dbe6SBarry Smith Mat_SeqAIJ *aij; 3575a31a438cSHong Zhang PetscInt *garray = a->garray, *colsub, Ncols; 357698b658c4SHong Zhang PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 357798b658c4SHong Zhang IS iscol_sub, iscmap; 357898b658c4SHong Zhang const PetscInt *is_idx, *cmap; 357918e627e3SHong Zhang PetscBool allcolumns = PETSC_FALSE; 3580a31a438cSHong Zhang MPI_Comm comm; 35817e2c5f70SBarry Smith 3582a0ff6018SBarry Smith PetscFunctionBegin; 35839566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3584d5761cdaSHong Zhang if (call == MAT_REUSE_MATRIX) { 35859566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 358628b400f6SJacob Faibussowitsch PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 35879566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_sub, &count)); 3588d5761cdaSHong Zhang 35899566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 359028b400f6SJacob Faibussowitsch PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3591d5761cdaSHong Zhang 35929566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 359328b400f6SJacob Faibussowitsch PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3594d5761cdaSHong Zhang 35959566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3596d5761cdaSHong Zhang 3597d5761cdaSHong Zhang } else { /* call == MAT_INITIAL_MATRIX) */ 35983b00a383SHong Zhang PetscBool flg; 35993b00a383SHong Zhang 36009566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &n)); 36019566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol, &Ncols)); 3602bcae8d28SHong Zhang 36033b00a383SHong Zhang /* (1) iscol -> nonscalable iscol_local */ 3604366a327dSHong Zhang /* Check for special case: each processor gets entire matrix columns */ 36059566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol_local, &flg)); 3606366a327dSHong Zhang if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3607462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3608366a327dSHong Zhang if (allcolumns) { 3609366a327dSHong Zhang iscol_sub = iscol_local; 36109566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol_local)); 36119566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3612366a327dSHong Zhang 36133b00a383SHong Zhang } else { 36141358a193SHong Zhang /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3615244c7f15SHong Zhang PetscInt *idx, *cmap1, k; 36169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ncols, &idx)); 36179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ncols, &cmap1)); 36189566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &is_idx)); 36198d2139bdSHong Zhang count = 0; 3620a31a438cSHong Zhang k = 0; 3621a31a438cSHong Zhang for (i = 0; i < Ncols; i++) { 3622a31a438cSHong Zhang j = is_idx[i]; 3623a31a438cSHong Zhang if (j >= cstart && j < cend) { 3624a31a438cSHong Zhang /* diagonal part of mat */ 36258d2139bdSHong Zhang idx[count] = j; 3626366a327dSHong Zhang cmap1[count++] = i; /* column index in submat */ 36274a3daf6eSHong Zhang } else if (Bn) { 3628a31a438cSHong Zhang /* off-diagonal part of mat */ 3629a31a438cSHong Zhang if (j == garray[k]) { 36308d2139bdSHong Zhang idx[count] = j; 3631a31a438cSHong Zhang cmap1[count++] = i; /* column index in submat */ 3632a31a438cSHong Zhang } else if (j > garray[k]) { 3633a31a438cSHong Zhang while (j > garray[k] && k < Bn - 1) k++; 3634a31a438cSHong Zhang if (j == garray[k]) { 3635a31a438cSHong Zhang idx[count] = j; 3636a31a438cSHong Zhang cmap1[count++] = i; /* column index in submat */ 36378d2139bdSHong Zhang } 36388d2139bdSHong Zhang } 36398d2139bdSHong Zhang } 36408d2139bdSHong Zhang } 36419566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 36428d2139bdSHong Zhang 36439566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 36449566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol, &cbs)); 36459566063dSJacob Faibussowitsch PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3646b6d9b4e0SHong Zhang 36479566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3648a31a438cSHong Zhang } 36498b3fa1f7SHong Zhang 36503b00a383SHong Zhang /* (3) Create sequential Msub */ 36519566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3652d5761cdaSHong Zhang } 36538d2139bdSHong Zhang 36549566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol_sub, &count)); 365557508eceSPierre Jolivet aij = (Mat_SeqAIJ *)Msub->data; 365698b658c4SHong Zhang ii = aij->i; 36579566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscmap, &cmap)); 3658a0ff6018SBarry Smith 3659a0ff6018SBarry Smith /* 3660a0ff6018SBarry Smith m - number of local rows 3661a31a438cSHong Zhang Ncols - number of columns (same on all processors) 3662a0ff6018SBarry Smith rstart - first row in new global matrix generated 3663a0ff6018SBarry Smith */ 36649566063dSJacob Faibussowitsch PetscCall(MatGetSize(Msub, &m, NULL)); 366598b658c4SHong Zhang 36663b00a383SHong Zhang if (call == MAT_INITIAL_MATRIX) { 36673b00a383SHong Zhang /* (4) Create parallel newmat */ 366898b658c4SHong Zhang PetscMPIInt rank, size; 3669bcae8d28SHong Zhang PetscInt csize; 367098b658c4SHong Zhang 36719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 36729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 367300e6dbe6SBarry Smith 3674a0ff6018SBarry Smith /* 367500e6dbe6SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 367600e6dbe6SBarry Smith portions of the matrix in order to do correct preallocation 3677a0ff6018SBarry Smith */ 367800e6dbe6SBarry Smith 367900e6dbe6SBarry Smith /* first get start and end of "diagonal" columns */ 36809566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 36816a6a5d1dSBarry Smith if (csize == PETSC_DECIDE) { 36829566063dSJacob Faibussowitsch PetscCall(ISGetSize(isrow, &mglobal)); 3683a31a438cSHong Zhang if (mglobal == Ncols) { /* square matrix */ 3684e2c4fddaSBarry Smith nlocal = m; 36856a6a5d1dSBarry Smith } else { 3686a31a438cSHong Zhang nlocal = Ncols / size + ((Ncols % size) > rank); 3687ab50ec6bSBarry Smith } 3688ab50ec6bSBarry Smith } else { 36896a6a5d1dSBarry Smith nlocal = csize; 36906a6a5d1dSBarry Smith } 36919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 369200e6dbe6SBarry Smith rstart = rend - nlocal; 3693aed4548fSBarry Smith PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 369400e6dbe6SBarry Smith 369500e6dbe6SBarry Smith /* next, compute all the lengths */ 369698b658c4SHong Zhang jj = aij->j; 36979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 369800e6dbe6SBarry Smith olens = dlens + m; 369900e6dbe6SBarry Smith for (i = 0; i < m; i++) { 370000e6dbe6SBarry Smith jend = ii[i + 1] - ii[i]; 370100e6dbe6SBarry Smith olen = 0; 370200e6dbe6SBarry Smith dlen = 0; 370300e6dbe6SBarry Smith for (j = 0; j < jend; j++) { 370415b2185cSHong Zhang if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 370500e6dbe6SBarry Smith else dlen++; 370600e6dbe6SBarry Smith jj++; 370700e6dbe6SBarry Smith } 370800e6dbe6SBarry Smith olens[i] = olen; 370900e6dbe6SBarry Smith dlens[i] = dlen; 371000e6dbe6SBarry Smith } 3711b6d9b4e0SHong Zhang 37129566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(isrow, &bs)); 37139566063dSJacob Faibussowitsch PetscCall(ISGetBlockSize(iscol, &cbs)); 371498b658c4SHong Zhang 37159566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &M)); 37169566063dSJacob Faibussowitsch PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 37179566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(M, bs, cbs)); 37189566063dSJacob Faibussowitsch PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 37199566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 37209566063dSJacob Faibussowitsch PetscCall(PetscFree(dlens)); 3721d5761cdaSHong Zhang 3722d5761cdaSHong Zhang } else { /* call == MAT_REUSE_MATRIX */ 3723a0ff6018SBarry Smith M = *newmat; 37249566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(M, &i, NULL)); 372508401ef6SPierre Jolivet PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 37269566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 3727c48de900SBarry Smith /* 3728c48de900SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3729c48de900SBarry Smith rather than the slower MatSetValues(). 3730c48de900SBarry Smith */ 3731c48de900SBarry Smith M->was_assembled = PETSC_TRUE; 3732c48de900SBarry Smith M->assembled = PETSC_FALSE; 3733a0ff6018SBarry Smith } 3734548ecf4dSHong Zhang 37353b00a383SHong Zhang /* (5) Set values of Msub to *newmat */ 37369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(count, &colsub)); 37379566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 373898b658c4SHong Zhang 373998b658c4SHong Zhang jj = aij->j; 37409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3741a0ff6018SBarry Smith for (i = 0; i < m; i++) { 3742a0ff6018SBarry Smith row = rstart + i; 374300e6dbe6SBarry Smith nz = ii[i + 1] - ii[i]; 374415b2185cSHong Zhang for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 37459566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 37469371c9d4SSatish Balay jj += nz; 37479371c9d4SSatish Balay aa += nz; 3748a0ff6018SBarry Smith } 37499566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 37509566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscmap, &cmap)); 3751a0ff6018SBarry Smith 37529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 37539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3754fee21e36SBarry Smith 37559566063dSJacob Faibussowitsch PetscCall(PetscFree(colsub)); 375698b658c4SHong Zhang 375798b658c4SHong Zhang /* save Msub, iscol_sub and iscmap used in processor for next request */ 3758fee21e36SBarry Smith if (call == MAT_INITIAL_MATRIX) { 37593b00a383SHong Zhang *newmat = M; 3760f4f49eeaSPierre Jolivet PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 37619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Msub)); 376298b658c4SHong Zhang 3763f4f49eeaSPierre Jolivet PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 37649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_sub)); 376598b658c4SHong Zhang 3766f4f49eeaSPierre Jolivet PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 37679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscmap)); 3768bcae8d28SHong Zhang 3769bcae8d28SHong Zhang if (iscol_local) { 3770f4f49eeaSPierre Jolivet PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 37719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 3772bcae8d28SHong Zhang } 377398b658c4SHong Zhang } 37743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3775a0ff6018SBarry Smith } 3776273d9f13SBarry Smith 3777df40acb1SHong Zhang /* 3778df40acb1SHong Zhang Not great since it makes two copies of the submatrix, first an SeqAIJ 3779df40acb1SHong Zhang in local and then by concatenating the local matrices the end result. 3780df40acb1SHong Zhang Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3781df40acb1SHong Zhang 378211a5261eSBarry Smith This requires a sequential iscol with all indices. 3783df40acb1SHong Zhang */ 3784d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3785d71ae5a4SJacob Faibussowitsch { 3786df40acb1SHong Zhang PetscMPIInt rank, size; 3787df40acb1SHong Zhang PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3788df40acb1SHong Zhang PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3789df40acb1SHong Zhang Mat M, Mreuse; 379098b658c4SHong Zhang MatScalar *aa, *vwork; 3791df40acb1SHong Zhang MPI_Comm comm; 3792df40acb1SHong Zhang Mat_SeqAIJ *aij; 37930b27a90eSHong Zhang PetscBool colflag, allcolumns = PETSC_FALSE; 3794df40acb1SHong Zhang 3795df40acb1SHong Zhang PetscFunctionBegin; 37969566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 37979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 37989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 3799df40acb1SHong Zhang 38000b27a90eSHong Zhang /* Check for special case: each processor gets entire matrix columns */ 38019566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &colflag)); 38029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &n)); 38030b27a90eSHong Zhang if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3804462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 38050b27a90eSHong Zhang 3806df40acb1SHong Zhang if (call == MAT_REUSE_MATRIX) { 38079566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 380828b400f6SJacob Faibussowitsch PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 38099566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3810df40acb1SHong Zhang } else { 38119566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3812df40acb1SHong Zhang } 3813df40acb1SHong Zhang 3814df40acb1SHong Zhang /* 3815df40acb1SHong Zhang m - number of local rows 3816df40acb1SHong Zhang n - number of columns (same on all processors) 3817df40acb1SHong Zhang rstart - first row in new global matrix generated 3818df40acb1SHong Zhang */ 38199566063dSJacob Faibussowitsch PetscCall(MatGetSize(Mreuse, &m, &n)); 38209566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3821df40acb1SHong Zhang if (call == MAT_INITIAL_MATRIX) { 382257508eceSPierre Jolivet aij = (Mat_SeqAIJ *)Mreuse->data; 3823df40acb1SHong Zhang ii = aij->i; 3824df40acb1SHong Zhang jj = aij->j; 3825df40acb1SHong Zhang 3826df40acb1SHong Zhang /* 3827df40acb1SHong Zhang Determine the number of non-zeros in the diagonal and off-diagonal 3828df40acb1SHong Zhang portions of the matrix in order to do correct preallocation 3829df40acb1SHong Zhang */ 3830df40acb1SHong Zhang 3831df40acb1SHong Zhang /* first get start and end of "diagonal" columns */ 3832df40acb1SHong Zhang if (csize == PETSC_DECIDE) { 38339566063dSJacob Faibussowitsch PetscCall(ISGetSize(isrow, &mglobal)); 3834df40acb1SHong Zhang if (mglobal == n) { /* square matrix */ 3835df40acb1SHong Zhang nlocal = m; 3836df40acb1SHong Zhang } else { 3837df40acb1SHong Zhang nlocal = n / size + ((n % size) > rank); 3838df40acb1SHong Zhang } 3839df40acb1SHong Zhang } else { 3840df40acb1SHong Zhang nlocal = csize; 3841df40acb1SHong Zhang } 38429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3843df40acb1SHong Zhang rstart = rend - nlocal; 3844aed4548fSBarry Smith PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3845df40acb1SHong Zhang 3846df40acb1SHong Zhang /* next, compute all the lengths */ 38479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3848df40acb1SHong Zhang olens = dlens + m; 3849df40acb1SHong Zhang for (i = 0; i < m; i++) { 3850df40acb1SHong Zhang jend = ii[i + 1] - ii[i]; 3851df40acb1SHong Zhang olen = 0; 3852df40acb1SHong Zhang dlen = 0; 3853df40acb1SHong Zhang for (j = 0; j < jend; j++) { 3854df40acb1SHong Zhang if (*jj < rstart || *jj >= rend) olen++; 3855df40acb1SHong Zhang else dlen++; 3856df40acb1SHong Zhang jj++; 3857df40acb1SHong Zhang } 3858df40acb1SHong Zhang olens[i] = olen; 3859df40acb1SHong Zhang dlens[i] = dlen; 3860df40acb1SHong Zhang } 38619566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &M)); 38629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 38639566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(M, bs, cbs)); 38649566063dSJacob Faibussowitsch PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 38659566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 38669566063dSJacob Faibussowitsch PetscCall(PetscFree(dlens)); 3867df40acb1SHong Zhang } else { 3868df40acb1SHong Zhang PetscInt ml, nl; 3869df40acb1SHong Zhang 3870df40acb1SHong Zhang M = *newmat; 38719566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(M, &ml, &nl)); 387208401ef6SPierre Jolivet PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 38739566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 3874df40acb1SHong Zhang /* 3875df40acb1SHong Zhang The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3876df40acb1SHong Zhang rather than the slower MatSetValues(). 3877df40acb1SHong Zhang */ 3878df40acb1SHong Zhang M->was_assembled = PETSC_TRUE; 3879df40acb1SHong Zhang M->assembled = PETSC_FALSE; 3880df40acb1SHong Zhang } 38819566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 388257508eceSPierre Jolivet aij = (Mat_SeqAIJ *)Mreuse->data; 3883df40acb1SHong Zhang ii = aij->i; 3884df40acb1SHong Zhang jj = aij->j; 38852e5835c6SStefano Zampini 38862e5835c6SStefano Zampini /* trigger copy to CPU if needed */ 38879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3888df40acb1SHong Zhang for (i = 0; i < m; i++) { 3889df40acb1SHong Zhang row = rstart + i; 3890df40acb1SHong Zhang nz = ii[i + 1] - ii[i]; 38919371c9d4SSatish Balay cwork = jj; 38928e3a54c0SPierre Jolivet jj = PetscSafePointerPlusOffset(jj, nz); 38939371c9d4SSatish Balay vwork = aa; 38948e3a54c0SPierre Jolivet aa = PetscSafePointerPlusOffset(aa, nz); 38959566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3896df40acb1SHong Zhang } 38979566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3898df40acb1SHong Zhang 38999566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 39009566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3901df40acb1SHong Zhang *newmat = M; 3902df40acb1SHong Zhang 3903df40acb1SHong Zhang /* save submatrix used in processor for next request */ 3904df40acb1SHong Zhang if (call == MAT_INITIAL_MATRIX) { 39059566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 39069566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Mreuse)); 3907df40acb1SHong Zhang } 39083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3909df40acb1SHong Zhang } 3910df40acb1SHong Zhang 3911ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3912d71ae5a4SJacob Faibussowitsch { 39136a3d2595SBarry Smith PetscInt m, cstart, cend, j, nnz, i, d, *ld; 39149f0612e4SBarry Smith PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3915ccd8e176SBarry Smith const PetscInt *JJ; 3916eeb24464SBarry Smith PetscBool nooffprocentries; 39176a3d2595SBarry Smith Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3918ccd8e176SBarry Smith 3919ccd8e176SBarry Smith PetscFunctionBegin; 39209566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 39219566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 3922d0f46423SBarry Smith m = B->rmap->n; 3923d0f46423SBarry Smith cstart = B->cmap->rstart; 3924d0f46423SBarry Smith cend = B->cmap->rend; 3925d0f46423SBarry Smith rstart = B->rmap->rstart; 39269f0612e4SBarry Smith irstart = Ii[0]; 3927899cda47SBarry Smith 39289566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3929ccd8e176SBarry Smith 393076bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 39318f8f2f0dSBarry Smith for (i = 0; i < m; i++) { 3932ecc77c7aSBarry Smith nnz = Ii[i + 1] - Ii[i]; 39339f0612e4SBarry Smith JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 393408401ef6SPierre Jolivet PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 393508401ef6SPierre Jolivet PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 393608401ef6SPierre Jolivet PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3937ecc77c7aSBarry Smith } 393876bd3646SJed Brown } 3939ecc77c7aSBarry Smith 39408f8f2f0dSBarry Smith for (i = 0; i < m; i++) { 3941b7940d39SSatish Balay nnz = Ii[i + 1] - Ii[i]; 39429f0612e4SBarry Smith JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3943ccd8e176SBarry Smith nnz_max = PetscMax(nnz_max, nnz); 3944ccd8e176SBarry Smith d = 0; 39450daa03b5SJed Brown for (j = 0; j < nnz; j++) { 39460daa03b5SJed Brown if (cstart <= JJ[j] && JJ[j] < cend) d++; 3947ccd8e176SBarry Smith } 3948ccd8e176SBarry Smith d_nnz[i] = d; 3949ccd8e176SBarry Smith o_nnz[i] = nnz - d; 3950ccd8e176SBarry Smith } 39519566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 39529566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 3953ccd8e176SBarry Smith 39548f8f2f0dSBarry Smith for (i = 0; i < m; i++) { 3955ccd8e176SBarry Smith ii = i + rstart; 39569f0612e4SBarry Smith PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3957ccd8e176SBarry Smith } 3958eeb24464SBarry Smith nooffprocentries = B->nooffprocentries; 3959eeb24464SBarry Smith B->nooffprocentries = PETSC_TRUE; 39609566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 39619566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3962eeb24464SBarry Smith B->nooffprocentries = nooffprocentries; 3963ccd8e176SBarry Smith 39646a3d2595SBarry Smith /* count number of entries below block diagonal */ 39656a3d2595SBarry Smith PetscCall(PetscFree(Aij->ld)); 39666a3d2595SBarry Smith PetscCall(PetscCalloc1(m, &ld)); 39676a3d2595SBarry Smith Aij->ld = ld; 39686a3d2595SBarry Smith for (i = 0; i < m; i++) { 39696a3d2595SBarry Smith nnz = Ii[i + 1] - Ii[i]; 39706a3d2595SBarry Smith j = 0; 3971ad540459SPierre Jolivet while (j < nnz && J[j] < cstart) j++; 39726a3d2595SBarry Smith ld[i] = j; 3973720a2405SPierre Jolivet if (J) J += nnz; 39746a3d2595SBarry Smith } 39756a3d2595SBarry Smith 39769566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 39773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3978ccd8e176SBarry Smith } 3979ccd8e176SBarry Smith 39801eea217eSSatish Balay /*@ 398111a5261eSBarry Smith MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3982ccd8e176SBarry Smith (the default parallel PETSc format). 3983ccd8e176SBarry Smith 3984d083f849SBarry Smith Collective 3985ccd8e176SBarry Smith 3986ccd8e176SBarry Smith Input Parameters: 3987a1661176SMatthew Knepley + B - the matrix 3988d8a51d2aSBarry Smith . i - the indices into `j` for the start of each local row (indices start with zero) 3989d8a51d2aSBarry Smith . j - the column indices for each local row (indices start with zero) 3990ccd8e176SBarry Smith - v - optional values in the matrix 3991ccd8e176SBarry Smith 3992ccd8e176SBarry Smith Level: developer 3993ccd8e176SBarry Smith 399412251496SSatish Balay Notes: 39952ef1f0ffSBarry Smith The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 39962ef1f0ffSBarry Smith thus you CANNOT change the matrix entries by changing the values of `v` after you have 399711a5261eSBarry Smith called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 399812251496SSatish Balay 39992ef1f0ffSBarry Smith The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 400012251496SSatish Balay 4001a4bd8bc0SBarry Smith A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4002a4bd8bc0SBarry Smith 4003a4bd8bc0SBarry Smith You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4004a4bd8bc0SBarry Smith 4005a4bd8bc0SBarry Smith If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4006a4bd8bc0SBarry Smith `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4007a4bd8bc0SBarry Smith 400812251496SSatish Balay The format which is used for the sparse matrix input, is equivalent to a 400912251496SSatish Balay row-major ordering.. i.e for the following matrix, the input data expected is 4010c5e4d11fSDmitry Karpeev as shown 401127430b45SBarry Smith .vb 401227430b45SBarry Smith 1 0 0 401327430b45SBarry Smith 2 0 3 P0 401427430b45SBarry Smith ------- 401527430b45SBarry Smith 4 5 6 P1 401627430b45SBarry Smith 401727430b45SBarry Smith Process0 [P0] rows_owned=[0,1] 401827430b45SBarry Smith i = {0,1,3} [size = nrow+1 = 2+1] 401927430b45SBarry Smith j = {0,0,2} [size = 3] 402027430b45SBarry Smith v = {1,2,3} [size = 3] 402127430b45SBarry Smith 402227430b45SBarry Smith Process1 [P1] rows_owned=[2] 402327430b45SBarry Smith i = {0,3} [size = nrow+1 = 1+1] 402427430b45SBarry Smith j = {0,1,2} [size = 3] 402527430b45SBarry Smith v = {4,5,6} [size = 3] 402627430b45SBarry Smith .ve 402712251496SSatish Balay 4028fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4029a4bd8bc0SBarry Smith `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4030ccd8e176SBarry Smith @*/ 4031d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4032d71ae5a4SJacob Faibussowitsch { 4033ccd8e176SBarry Smith PetscFunctionBegin; 4034cac4c232SBarry Smith PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 40353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4036ccd8e176SBarry Smith } 4037ccd8e176SBarry Smith 40385d83a8b1SBarry Smith /*@ 403911a5261eSBarry Smith MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4040273d9f13SBarry Smith (the default parallel PETSc format). For good matrix assembly performance 4041273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 404220f4b53cSBarry Smith `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4043273d9f13SBarry Smith 4044d083f849SBarry Smith Collective 4045273d9f13SBarry Smith 4046273d9f13SBarry Smith Input Parameters: 40471c4f3114SJed Brown + B - the matrix 4048273d9f13SBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4049273d9f13SBarry Smith (same value is used for all local rows) 4050273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 4051273d9f13SBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 40522ef1f0ffSBarry Smith or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4053273d9f13SBarry Smith The size of this array is equal to the number of local rows, i.e 'm'. 40543287b5eaSJed Brown For matrices that will be factored, you must leave room for (and set) 40553287b5eaSJed Brown the diagonal entry even if it is zero. 4056273d9f13SBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4057273d9f13SBarry Smith submatrix (same value is used for all local rows). 4058273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 4059273d9f13SBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 40602ef1f0ffSBarry Smith each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4061273d9f13SBarry Smith structure. The size of this array is equal to the number 4062273d9f13SBarry Smith of local rows, i.e 'm'. 4063273d9f13SBarry Smith 40642920cce0SJacob Faibussowitsch Example Usage: 406527430b45SBarry Smith Consider the following 8x8 matrix with 34 non-zero values, that is 406627430b45SBarry Smith assembled across 3 processors. Lets assume that proc0 owns 3 rows, 406727430b45SBarry Smith proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 406827430b45SBarry Smith as follows 406927430b45SBarry Smith 407027430b45SBarry Smith .vb 407127430b45SBarry Smith 1 2 0 | 0 3 0 | 0 4 407227430b45SBarry Smith Proc0 0 5 6 | 7 0 0 | 8 0 407327430b45SBarry Smith 9 0 10 | 11 0 0 | 12 0 407427430b45SBarry Smith ------------------------------------- 407527430b45SBarry Smith 13 0 14 | 15 16 17 | 0 0 407627430b45SBarry Smith Proc1 0 18 0 | 19 20 21 | 0 0 407727430b45SBarry Smith 0 0 0 | 22 23 0 | 24 0 407827430b45SBarry Smith ------------------------------------- 407927430b45SBarry Smith Proc2 25 26 27 | 0 0 28 | 29 0 408027430b45SBarry Smith 30 0 0 | 31 32 33 | 0 34 408127430b45SBarry Smith .ve 408227430b45SBarry Smith 408327430b45SBarry Smith This can be represented as a collection of submatrices as 408427430b45SBarry Smith .vb 408527430b45SBarry Smith A B C 408627430b45SBarry Smith D E F 408727430b45SBarry Smith G H I 408827430b45SBarry Smith .ve 408927430b45SBarry Smith 409027430b45SBarry Smith Where the submatrices A,B,C are owned by proc0, D,E,F are 409127430b45SBarry Smith owned by proc1, G,H,I are owned by proc2. 409227430b45SBarry Smith 409327430b45SBarry Smith The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 409427430b45SBarry Smith The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 409527430b45SBarry Smith The 'M','N' parameters are 8,8, and have the same values on all procs. 409627430b45SBarry Smith 409727430b45SBarry Smith The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 409827430b45SBarry Smith submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 409927430b45SBarry Smith corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 410027430b45SBarry Smith Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 410127430b45SBarry Smith part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 410227430b45SBarry Smith matrix, ans [DF] as another `MATSEQAIJ` matrix. 410327430b45SBarry Smith 410420f4b53cSBarry Smith When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 410520f4b53cSBarry Smith allocated for every row of the local diagonal submatrix, and `o_nz` 410627430b45SBarry Smith storage locations are allocated for every row of the OFF-DIAGONAL submat. 410720f4b53cSBarry Smith One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 410827430b45SBarry Smith rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 410920f4b53cSBarry Smith In this case, the values of `d_nz`, `o_nz` are 411027430b45SBarry Smith .vb 411127430b45SBarry Smith proc0 dnz = 2, o_nz = 2 411227430b45SBarry Smith proc1 dnz = 3, o_nz = 2 411327430b45SBarry Smith proc2 dnz = 1, o_nz = 4 411427430b45SBarry Smith .ve 411520f4b53cSBarry Smith We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 411627430b45SBarry Smith translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 411727430b45SBarry Smith for proc3. i.e we are using 12+15+10=37 storage locations to store 411827430b45SBarry Smith 34 values. 411927430b45SBarry Smith 412020f4b53cSBarry Smith When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 412127430b45SBarry Smith for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 412220f4b53cSBarry Smith In the above case the values for `d_nnz`, `o_nnz` are 412327430b45SBarry Smith .vb 412427430b45SBarry Smith proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 412527430b45SBarry Smith proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 412627430b45SBarry Smith proc2 d_nnz = [1,1] and o_nnz = [4,4] 412727430b45SBarry Smith .ve 412827430b45SBarry Smith Here the space allocated is sum of all the above values i.e 34, and 412927430b45SBarry Smith hence pre-allocation is perfect. 413027430b45SBarry Smith 413127430b45SBarry Smith Level: intermediate 413227430b45SBarry Smith 413327430b45SBarry Smith Notes: 413449a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 413549a6f317SBarry Smith 413627430b45SBarry Smith The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 41370598bfebSBarry Smith storage. The stored row and column indices begin with zero. 4138651615e1SBarry Smith See [Sparse Matrices](sec_matsparse) for details. 4139273d9f13SBarry Smith 4140273d9f13SBarry Smith The parallel matrix is partitioned such that the first m0 rows belong to 4141273d9f13SBarry Smith process 0, the next m1 rows belong to process 1, the next m2 rows belong 4142273d9f13SBarry Smith to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4143273d9f13SBarry Smith 4144273d9f13SBarry Smith The DIAGONAL portion of the local submatrix of a processor can be defined 4145a05b864aSJed Brown as the submatrix which is obtained by extraction the part corresponding to 4146a05b864aSJed Brown the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4147a05b864aSJed Brown first row that belongs to the processor, r2 is the last row belonging to 4148a05b864aSJed Brown the this processor, and c1-c2 is range of indices of the local part of a 4149a05b864aSJed Brown vector suitable for applying the matrix to. This is an mxn matrix. In the 4150a05b864aSJed Brown common case of a square matrix, the row and column ranges are the same and 4151a05b864aSJed Brown the DIAGONAL part is also square. The remaining portion of the local 4152a05b864aSJed Brown submatrix (mxN) constitute the OFF-DIAGONAL portion. 4153273d9f13SBarry Smith 41542ef1f0ffSBarry Smith If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4155273d9f13SBarry Smith 415627430b45SBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 4157aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 415827430b45SBarry Smith You can also run with the option `-info` and look for messages with the string 4159aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 4160aa95bbe8SBarry Smith 41611cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4162a4bd8bc0SBarry Smith `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4163273d9f13SBarry Smith @*/ 4164d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4165d71ae5a4SJacob Faibussowitsch { 4166273d9f13SBarry Smith PetscFunctionBegin; 41676ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 41686ba663aaSJed Brown PetscValidType(B, 1); 4169cac4c232SBarry Smith PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 41703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4171273d9f13SBarry Smith } 4172273d9f13SBarry Smith 417358d36128SBarry Smith /*@ 417411a5261eSBarry Smith MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 41758f8f2f0dSBarry Smith CSR format for the local rows. 41762fb0ec9aSBarry Smith 4177d083f849SBarry Smith Collective 41782fb0ec9aSBarry Smith 41792fb0ec9aSBarry Smith Input Parameters: 41802fb0ec9aSBarry Smith + comm - MPI communicator 418111a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 41822fb0ec9aSBarry Smith . n - This value should be the same as the local size used in creating the 4183d8a51d2aSBarry Smith x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4184d8a51d2aSBarry Smith calculated if `N` is given) For square matrices n is almost always `m`. 4185d8a51d2aSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4186d8a51d2aSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 41875693b835SJunchao Zhang . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 41885693b835SJunchao Zhang . j - global column indices 4189f1f2ae84SBarry Smith - a - optional matrix values 41902fb0ec9aSBarry Smith 41912fb0ec9aSBarry Smith Output Parameter: 41922fb0ec9aSBarry Smith . mat - the matrix 419303bfb495SBarry Smith 41942fb0ec9aSBarry Smith Level: intermediate 41952fb0ec9aSBarry Smith 41962fb0ec9aSBarry Smith Notes: 41972ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4198d4c8cc39SPierre Jolivet thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4199d4c8cc39SPierre Jolivet called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 42002fb0ec9aSBarry Smith 42012ef1f0ffSBarry Smith The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 420212251496SSatish Balay 42035693b835SJunchao Zhang Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4204a4bd8bc0SBarry Smith 42055693b835SJunchao Zhang If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4206a4bd8bc0SBarry Smith `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4207a4bd8bc0SBarry Smith 420812251496SSatish Balay The format which is used for the sparse matrix input, is equivalent to a 4209d4c8cc39SPierre Jolivet row-major ordering, i.e., for the following matrix, the input data expected is 4210c5e4d11fSDmitry Karpeev as shown 42112ef1f0ffSBarry Smith .vb 42122ef1f0ffSBarry Smith 1 0 0 42132ef1f0ffSBarry Smith 2 0 3 P0 42142ef1f0ffSBarry Smith ------- 42152ef1f0ffSBarry Smith 4 5 6 P1 42168f8f2f0dSBarry Smith 42172ef1f0ffSBarry Smith Process0 [P0] rows_owned=[0,1] 42182ef1f0ffSBarry Smith i = {0,1,3} [size = nrow+1 = 2+1] 42192ef1f0ffSBarry Smith j = {0,0,2} [size = 3] 42202ef1f0ffSBarry Smith v = {1,2,3} [size = 3] 42212fb0ec9aSBarry Smith 42222ef1f0ffSBarry Smith Process1 [P1] rows_owned=[2] 42232ef1f0ffSBarry Smith i = {0,3} [size = nrow+1 = 1+1] 42242ef1f0ffSBarry Smith j = {0,1,2} [size = 3] 42252ef1f0ffSBarry Smith v = {4,5,6} [size = 3] 42262ef1f0ffSBarry Smith .ve 42272ef1f0ffSBarry Smith 4228d4c8cc39SPierre Jolivet .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 42295693b835SJunchao Zhang `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 42302fb0ec9aSBarry Smith @*/ 4231d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4232d71ae5a4SJacob Faibussowitsch { 42332fb0ec9aSBarry Smith PetscFunctionBegin; 423408401ef6SPierre Jolivet PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 423508401ef6SPierre Jolivet PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 42369566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 42379566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 42389566063dSJacob Faibussowitsch /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 42399566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPIAIJ)); 42409566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 42413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42422fb0ec9aSBarry Smith } 42432fb0ec9aSBarry Smith 42448f8f2f0dSBarry Smith /*@ 424511a5261eSBarry Smith MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 42462ef1f0ffSBarry Smith CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 42472ef1f0ffSBarry Smith from `MatCreateMPIAIJWithArrays()` 42486a3d2595SBarry Smith 42496a3d2595SBarry Smith Deprecated: Use `MatUpdateMPIAIJWithArray()` 42508f8f2f0dSBarry Smith 42518f8f2f0dSBarry Smith Collective 42528f8f2f0dSBarry Smith 42538f8f2f0dSBarry Smith Input Parameters: 42548f8f2f0dSBarry Smith + mat - the matrix 425511a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 42568f8f2f0dSBarry Smith . n - This value should be the same as the local size used in creating the 425711a5261eSBarry Smith x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 42588f8f2f0dSBarry Smith calculated if N is given) For square matrices n is almost always m. 425911a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 426011a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 42618f8f2f0dSBarry Smith . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 42628f8f2f0dSBarry Smith . J - column indices 42638f8f2f0dSBarry Smith - v - matrix values 42648f8f2f0dSBarry Smith 42652ef1f0ffSBarry Smith Level: deprecated 42668f8f2f0dSBarry Smith 42671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4268a4bd8bc0SBarry Smith `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 42698f8f2f0dSBarry Smith @*/ 4270d71ae5a4SJacob Faibussowitsch PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4271d71ae5a4SJacob Faibussowitsch { 42726a3d2595SBarry Smith PetscInt nnz, i; 42738f8f2f0dSBarry Smith PetscBool nooffprocentries; 42748f8f2f0dSBarry Smith Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4275fff043a9SJunchao Zhang Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4276fff043a9SJunchao Zhang PetscScalar *ad, *ao; 42778f8f2f0dSBarry Smith PetscInt ldi, Iii, md; 42786a3d2595SBarry Smith const PetscInt *Adi = Ad->i; 42796a3d2595SBarry Smith PetscInt *ld = Aij->ld; 42808f8f2f0dSBarry Smith 42818f8f2f0dSBarry Smith PetscFunctionBegin; 4282aed4548fSBarry Smith PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 428308401ef6SPierre Jolivet PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 428408401ef6SPierre Jolivet PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 428508401ef6SPierre Jolivet PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 42868f8f2f0dSBarry Smith 42879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 42889566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 42898f8f2f0dSBarry Smith 42908f8f2f0dSBarry Smith for (i = 0; i < m; i++) { 42914c17f1ccSBarry Smith if (PetscDefined(USE_DEBUG)) { 42924c17f1ccSBarry Smith for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 42934c17f1ccSBarry Smith PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 42944c17f1ccSBarry Smith PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 42954c17f1ccSBarry Smith } 42964c17f1ccSBarry Smith } 42978f8f2f0dSBarry Smith nnz = Ii[i + 1] - Ii[i]; 42988f8f2f0dSBarry Smith Iii = Ii[i]; 42998f8f2f0dSBarry Smith ldi = ld[i]; 43008f8f2f0dSBarry Smith md = Adi[i + 1] - Adi[i]; 43019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 43029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 43039566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 43048f8f2f0dSBarry Smith ad += md; 43058f8f2f0dSBarry Smith ao += nnz - md; 43068f8f2f0dSBarry Smith } 43078f8f2f0dSBarry Smith nooffprocentries = mat->nooffprocentries; 43088f8f2f0dSBarry Smith mat->nooffprocentries = PETSC_TRUE; 43099566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 43109566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 43119566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 43129566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 43139566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 43149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 43159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 43168f8f2f0dSBarry Smith mat->nooffprocentries = nooffprocentries; 43173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 43188f8f2f0dSBarry Smith } 43198f8f2f0dSBarry Smith 43206a3d2595SBarry Smith /*@ 432111a5261eSBarry Smith MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 43226a3d2595SBarry Smith 43236a3d2595SBarry Smith Collective 43246a3d2595SBarry Smith 43256a3d2595SBarry Smith Input Parameters: 43266a3d2595SBarry Smith + mat - the matrix 43276a3d2595SBarry Smith - v - matrix values, stored by row 43286a3d2595SBarry Smith 43296a3d2595SBarry Smith Level: intermediate 43306a3d2595SBarry Smith 4331a4bd8bc0SBarry Smith Notes: 43326a3d2595SBarry Smith The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 43336a3d2595SBarry Smith 4334a4bd8bc0SBarry Smith The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4335a4bd8bc0SBarry Smith 43361cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4337a4bd8bc0SBarry Smith `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 43386a3d2595SBarry Smith @*/ 4339d71ae5a4SJacob Faibussowitsch PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4340d71ae5a4SJacob Faibussowitsch { 43416a3d2595SBarry Smith PetscInt nnz, i, m; 43426a3d2595SBarry Smith PetscBool nooffprocentries; 43436a3d2595SBarry Smith Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 43446a3d2595SBarry Smith Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 43456a3d2595SBarry Smith Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 43466a3d2595SBarry Smith PetscScalar *ad, *ao; 43476a3d2595SBarry Smith const PetscInt *Adi = Ad->i, *Adj = Ao->i; 43486a3d2595SBarry Smith PetscInt ldi, Iii, md; 43496a3d2595SBarry Smith PetscInt *ld = Aij->ld; 43506a3d2595SBarry Smith 43516a3d2595SBarry Smith PetscFunctionBegin; 43526a3d2595SBarry Smith m = mat->rmap->n; 43536a3d2595SBarry Smith 43546a3d2595SBarry Smith PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 43556a3d2595SBarry Smith PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 43566a3d2595SBarry Smith Iii = 0; 43576a3d2595SBarry Smith for (i = 0; i < m; i++) { 43586a3d2595SBarry Smith nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 43596a3d2595SBarry Smith ldi = ld[i]; 43606a3d2595SBarry Smith md = Adi[i + 1] - Adi[i]; 43616a3d2595SBarry Smith PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 43626a3d2595SBarry Smith ad += md; 4363810441c8SPierre Jolivet if (ao) { 4364810441c8SPierre Jolivet PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4365810441c8SPierre Jolivet PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 43666a3d2595SBarry Smith ao += nnz - md; 4367810441c8SPierre Jolivet } 43686a3d2595SBarry Smith Iii += nnz; 43696a3d2595SBarry Smith } 43706a3d2595SBarry Smith nooffprocentries = mat->nooffprocentries; 43716a3d2595SBarry Smith mat->nooffprocentries = PETSC_TRUE; 43726a3d2595SBarry Smith PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 43736a3d2595SBarry Smith PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 43746a3d2595SBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 43756a3d2595SBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 43766a3d2595SBarry Smith PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 43776a3d2595SBarry Smith PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 43786a3d2595SBarry Smith PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 43796a3d2595SBarry Smith mat->nooffprocentries = nooffprocentries; 43803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 43816a3d2595SBarry Smith } 43826a3d2595SBarry Smith 43835d83a8b1SBarry Smith /*@ 438411a5261eSBarry Smith MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4385273d9f13SBarry Smith (the default parallel PETSc format). For good matrix assembly performance 4386273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 43872ef1f0ffSBarry Smith `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4388273d9f13SBarry Smith 4389d083f849SBarry Smith Collective 4390273d9f13SBarry Smith 4391273d9f13SBarry Smith Input Parameters: 4392273d9f13SBarry Smith + comm - MPI communicator 439311a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4394273d9f13SBarry Smith This value should be the same as the local size used in creating the 4395273d9f13SBarry Smith y vector for the matrix-vector product y = Ax. 4396273d9f13SBarry Smith . n - This value should be the same as the local size used in creating the 439745f401ebSJose E. Roman x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4398273d9f13SBarry Smith calculated if N is given) For square matrices n is almost always m. 439911a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 440011a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4401273d9f13SBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4402273d9f13SBarry Smith (same value is used for all local rows) 4403273d9f13SBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 4404273d9f13SBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 44052ef1f0ffSBarry Smith or `NULL`, if `d_nz` is used to specify the nonzero structure. 4406273d9f13SBarry Smith The size of this array is equal to the number of local rows, i.e 'm'. 4407273d9f13SBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4408273d9f13SBarry Smith submatrix (same value is used for all local rows). 4409273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 4410273d9f13SBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 44112ef1f0ffSBarry Smith each row) or `NULL`, if `o_nz` is used to specify the nonzero 4412273d9f13SBarry Smith structure. The size of this array is equal to the number 4413273d9f13SBarry Smith of local rows, i.e 'm'. 4414273d9f13SBarry Smith 4415273d9f13SBarry Smith Output Parameter: 4416273d9f13SBarry Smith . A - the matrix 4417273d9f13SBarry Smith 441827430b45SBarry Smith Options Database Keys: 441927430b45SBarry Smith + -mat_no_inode - Do not use inodes 442027430b45SBarry Smith . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 442127430b45SBarry Smith - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4422727bdf9bSBarry Smith See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4423727bdf9bSBarry Smith to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 442427430b45SBarry Smith 44252ef1f0ffSBarry Smith Level: intermediate 44262ef1f0ffSBarry Smith 442727430b45SBarry Smith Notes: 442877433607SBarry Smith It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4429f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 443011a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4431175b88e8SBarry Smith 443249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 443349a6f317SBarry Smith 44342ef1f0ffSBarry Smith The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 44352ef1f0ffSBarry Smith processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4436273d9f13SBarry Smith storage requirements for this matrix. 4437273d9f13SBarry Smith 443811a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4439273d9f13SBarry Smith processor than it must be used on all processors that share the object for 4440273d9f13SBarry Smith that argument. 4441273d9f13SBarry Smith 4442727bdf9bSBarry Smith If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4443727bdf9bSBarry Smith `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4444727bdf9bSBarry Smith 4445273d9f13SBarry Smith The user MUST specify either the local or global matrix dimensions 4446273d9f13SBarry Smith (possibly both). 4447273d9f13SBarry Smith 444833a7c187SSatish Balay The parallel matrix is partitioned across processors such that the 4449727bdf9bSBarry Smith first `m0` rows belong to process 0, the next `m1` rows belong to 4450727bdf9bSBarry Smith process 1, the next `m2` rows belong to process 2, etc., where 4451727bdf9bSBarry Smith `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 445233a7c187SSatish Balay values corresponding to [m x N] submatrix. 4453273d9f13SBarry Smith 445433a7c187SSatish Balay The columns are logically partitioned with the n0 columns belonging 445533a7c187SSatish Balay to 0th partition, the next n1 columns belonging to the next 4456df3898eeSBarry Smith partition etc.. where n0,n1,n2... are the input parameter 'n'. 445733a7c187SSatish Balay 445833a7c187SSatish Balay The DIAGONAL portion of the local submatrix on any given processor 445933a7c187SSatish Balay is the submatrix corresponding to the rows and columns m,n 446033a7c187SSatish Balay corresponding to the given processor. i.e diagonal matrix on 446133a7c187SSatish Balay process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 446233a7c187SSatish Balay etc. The remaining portion of the local submatrix [m x (N-n)] 446333a7c187SSatish Balay constitute the OFF-DIAGONAL portion. The example below better 446433a7c187SSatish Balay illustrates this concept. 446533a7c187SSatish Balay 446633a7c187SSatish Balay For a square global matrix we define each processor's diagonal portion 446733a7c187SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 446833a7c187SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 446933a7c187SSatish Balay local matrix (a rectangular submatrix). 4470273d9f13SBarry Smith 44712ef1f0ffSBarry Smith If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4472273d9f13SBarry Smith 447397d05335SKris Buschelman When calling this routine with a single process communicator, a matrix of 44742ef1f0ffSBarry Smith type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4475da57b5cdSKarl Rupp type of communicator, use the construction mechanism 4476da57b5cdSKarl Rupp .vb 44772ef1f0ffSBarry Smith MatCreate(..., &A); 44782ef1f0ffSBarry Smith MatSetType(A, MATMPIAIJ); 44792ef1f0ffSBarry Smith MatSetSizes(A, m, n, M, N); 44802ef1f0ffSBarry Smith MatMPIAIJSetPreallocation(A, ...); 4481da57b5cdSKarl Rupp .ve 448297d05335SKris Buschelman 4483273d9f13SBarry Smith By default, this format uses inodes (identical nodes) when possible. 4484273d9f13SBarry Smith We search for consecutive rows with the same nonzero structure, thereby 4485273d9f13SBarry Smith reusing matrix information to achieve increased efficiency. 4486273d9f13SBarry Smith 44872920cce0SJacob Faibussowitsch Example Usage: 4488273d9f13SBarry Smith Consider the following 8x8 matrix with 34 non-zero values, that is 4489273d9f13SBarry Smith assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4490273d9f13SBarry Smith proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4491efc377ccSKarl Rupp as follows 4492273d9f13SBarry Smith 4493273d9f13SBarry Smith .vb 4494273d9f13SBarry Smith 1 2 0 | 0 3 0 | 0 4 4495273d9f13SBarry Smith Proc0 0 5 6 | 7 0 0 | 8 0 4496273d9f13SBarry Smith 9 0 10 | 11 0 0 | 12 0 4497273d9f13SBarry Smith ------------------------------------- 4498273d9f13SBarry Smith 13 0 14 | 15 16 17 | 0 0 4499273d9f13SBarry Smith Proc1 0 18 0 | 19 20 21 | 0 0 4500273d9f13SBarry Smith 0 0 0 | 22 23 0 | 24 0 4501273d9f13SBarry Smith ------------------------------------- 4502273d9f13SBarry Smith Proc2 25 26 27 | 0 0 28 | 29 0 4503273d9f13SBarry Smith 30 0 0 | 31 32 33 | 0 34 4504273d9f13SBarry Smith .ve 4505273d9f13SBarry Smith 4506da57b5cdSKarl Rupp This can be represented as a collection of submatrices as 4507273d9f13SBarry Smith 4508273d9f13SBarry Smith .vb 4509273d9f13SBarry Smith A B C 4510273d9f13SBarry Smith D E F 4511273d9f13SBarry Smith G H I 4512273d9f13SBarry Smith .ve 4513273d9f13SBarry Smith 4514273d9f13SBarry Smith Where the submatrices A,B,C are owned by proc0, D,E,F are 4515273d9f13SBarry Smith owned by proc1, G,H,I are owned by proc2. 4516273d9f13SBarry Smith 4517273d9f13SBarry Smith The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4518273d9f13SBarry Smith The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4519273d9f13SBarry Smith The 'M','N' parameters are 8,8, and have the same values on all procs. 4520273d9f13SBarry Smith 4521273d9f13SBarry Smith The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4522273d9f13SBarry Smith submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4523273d9f13SBarry Smith corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4524273d9f13SBarry Smith Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 452527430b45SBarry Smith part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4526273d9f13SBarry Smith matrix, ans [DF] as another SeqAIJ matrix. 4527273d9f13SBarry Smith 45282ef1f0ffSBarry Smith When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 45292ef1f0ffSBarry Smith allocated for every row of the local diagonal submatrix, and `o_nz` 4530273d9f13SBarry Smith storage locations are allocated for every row of the OFF-DIAGONAL submat. 45312ef1f0ffSBarry Smith One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4532273d9f13SBarry Smith rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 45332ef1f0ffSBarry Smith In this case, the values of `d_nz`,`o_nz` are 4534273d9f13SBarry Smith .vb 453527430b45SBarry Smith proc0 dnz = 2, o_nz = 2 453627430b45SBarry Smith proc1 dnz = 3, o_nz = 2 453727430b45SBarry Smith proc2 dnz = 1, o_nz = 4 4538273d9f13SBarry Smith .ve 45392ef1f0ffSBarry Smith We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4540273d9f13SBarry Smith translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4541273d9f13SBarry Smith for proc3. i.e we are using 12+15+10=37 storage locations to store 4542273d9f13SBarry Smith 34 values. 4543273d9f13SBarry Smith 45442ef1f0ffSBarry Smith When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4545a5b23f4aSJose E. Roman for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4546da57b5cdSKarl Rupp In the above case the values for d_nnz,o_nnz are 4547273d9f13SBarry Smith .vb 454827430b45SBarry Smith proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 454927430b45SBarry Smith proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 455027430b45SBarry Smith proc2 d_nnz = [1,1] and o_nnz = [4,4] 4551273d9f13SBarry Smith .ve 4552273d9f13SBarry Smith Here the space allocated is sum of all the above values i.e 34, and 4553273d9f13SBarry Smith hence pre-allocation is perfect. 4554273d9f13SBarry Smith 45551cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4556727bdf9bSBarry Smith `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4557727bdf9bSBarry Smith `MatGetOwnershipRangesColumn()`, `PetscLayout` 4558273d9f13SBarry Smith @*/ 4559d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4560d71ae5a4SJacob Faibussowitsch { 4561b1d57f15SBarry Smith PetscMPIInt size; 4562273d9f13SBarry Smith 4563273d9f13SBarry Smith PetscFunctionBegin; 45649566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 45659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 45669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4567273d9f13SBarry Smith if (size > 1) { 45689566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPIAIJ)); 45699566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4570273d9f13SBarry Smith } else { 45719566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJ)); 45729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4573273d9f13SBarry Smith } 45743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4575273d9f13SBarry Smith } 4576195d93cdSBarry Smith 45770b98dbb4SBarry Smith /*MC 45780b98dbb4SBarry Smith MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 45790b98dbb4SBarry Smith 45800b98dbb4SBarry Smith Synopsis: 45810b98dbb4SBarry Smith MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 45820b98dbb4SBarry Smith 45830b98dbb4SBarry Smith Not Collective 45840b98dbb4SBarry Smith 45850b98dbb4SBarry Smith Input Parameter: 45860b98dbb4SBarry Smith . A - the `MATMPIAIJ` matrix 45870b98dbb4SBarry Smith 45880b98dbb4SBarry Smith Output Parameters: 45890b98dbb4SBarry Smith + Ad - the diagonal portion of the matrix 45904cf0e950SBarry Smith . Ao - the off-diagonal portion of the matrix 45912ef1f0ffSBarry Smith . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 45920b98dbb4SBarry Smith - ierr - error code 45930b98dbb4SBarry Smith 45940b98dbb4SBarry Smith Level: advanced 45950b98dbb4SBarry Smith 45960b98dbb4SBarry Smith Note: 45970b98dbb4SBarry Smith Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 45980b98dbb4SBarry Smith 45991cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 46000b98dbb4SBarry Smith M*/ 46010b98dbb4SBarry Smith 46020b98dbb4SBarry Smith /*MC 46030b98dbb4SBarry Smith MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 46040b98dbb4SBarry Smith 46050b98dbb4SBarry Smith Synopsis: 46060b98dbb4SBarry Smith MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 46070b98dbb4SBarry Smith 46080b98dbb4SBarry Smith Not Collective 46090b98dbb4SBarry Smith 46100b98dbb4SBarry Smith Input Parameters: 46110b98dbb4SBarry Smith + A - the `MATMPIAIJ` matrix 46120b98dbb4SBarry Smith . Ad - the diagonal portion of the matrix 46134cf0e950SBarry Smith . Ao - the off-diagonal portion of the matrix 46142ef1f0ffSBarry Smith . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 46150b98dbb4SBarry Smith - ierr - error code 46160b98dbb4SBarry Smith 46170b98dbb4SBarry Smith Level: advanced 46180b98dbb4SBarry Smith 46191cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 46200b98dbb4SBarry Smith M*/ 46210b98dbb4SBarry Smith 4622127ca0efSMatthew Knepley /*@C 46230ab4885dSBarry Smith MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4624127ca0efSMatthew Knepley 46252ef1f0ffSBarry Smith Not Collective 4626127ca0efSMatthew Knepley 4627127ca0efSMatthew Knepley Input Parameter: 462811a5261eSBarry Smith . A - The `MATMPIAIJ` matrix 4629127ca0efSMatthew Knepley 4630127ca0efSMatthew Knepley Output Parameters: 463111a5261eSBarry Smith + Ad - The local diagonal block as a `MATSEQAIJ` matrix 463211a5261eSBarry Smith . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 46332ef1f0ffSBarry Smith - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4634127ca0efSMatthew Knepley 46350ab4885dSBarry Smith Level: intermediate 46360ab4885dSBarry Smith 463711a5261eSBarry Smith Note: 46382ef1f0ffSBarry Smith The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 46392ef1f0ffSBarry Smith in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 46402ef1f0ffSBarry Smith the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4641127ca0efSMatthew Knepley local column numbers to global column numbers in the original matrix. 4642127ca0efSMatthew Knepley 4643fe59aa6dSJacob Faibussowitsch Fortran Notes: 46440ab4885dSBarry Smith `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4645127ca0efSMatthew Knepley 4646fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4647127ca0efSMatthew Knepley @*/ 4648d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4649d71ae5a4SJacob Faibussowitsch { 4650195d93cdSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 465104cf37c7SBarry Smith PetscBool flg; 4652b1d57f15SBarry Smith 4653195d93cdSBarry Smith PetscFunctionBegin; 46549566063dSJacob Faibussowitsch PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 465528b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 465621e72a00SBarry Smith if (Ad) *Ad = a->A; 465721e72a00SBarry Smith if (Ao) *Ao = a->B; 465821e72a00SBarry Smith if (colmap) *colmap = a->garray; 46593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4660195d93cdSBarry Smith } 4661a2243be0SBarry Smith 4662d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4663d71ae5a4SJacob Faibussowitsch { 4664110bb6e1SHong Zhang PetscInt m, N, i, rstart, nnz, Ii; 46659b8102ccSHong Zhang PetscInt *indx; 4666110bb6e1SHong Zhang PetscScalar *values; 4667421ddf4dSJunchao Zhang MatType rootType; 46689b8102ccSHong Zhang 46699b8102ccSHong Zhang PetscFunctionBegin; 46709566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 4671110bb6e1SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4672110bb6e1SHong Zhang PetscInt *dnz, *onz, sum, bs, cbs; 4673110bb6e1SHong Zhang 467448a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4675a22543b6SHong Zhang /* Check sum(n) = N */ 4676462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 467708401ef6SPierre Jolivet PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4678a22543b6SHong Zhang 46799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 46809b8102ccSHong Zhang rstart -= m; 46819b8102ccSHong Zhang 4682d0609cedSBarry Smith MatPreallocateBegin(comm, m, n, dnz, onz); 46839b8102ccSHong Zhang for (i = 0; i < m; i++) { 46849566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 46859566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 46869566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 46879b8102ccSHong Zhang } 46889b8102ccSHong Zhang 46899566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, outmat)); 46909566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 46919566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 46929566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 46939566063dSJacob Faibussowitsch PetscCall(MatGetRootType_Private(inmat, &rootType)); 46949566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat, rootType)); 46959566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 46969566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4697d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 46989566063dSJacob Faibussowitsch PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 46999b8102ccSHong Zhang } 47009b8102ccSHong Zhang 4701110bb6e1SHong Zhang /* numeric phase */ 47029566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 47039b8102ccSHong Zhang for (i = 0; i < m; i++) { 47049566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 47059b8102ccSHong Zhang Ii = i + rstart; 47069566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 47079566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 47089b8102ccSHong Zhang } 47099566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 47109566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 47113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4712c5d6d63eSBarry Smith } 4713c5d6d63eSBarry Smith 471449abdd8aSBarry Smith static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4715d71ae5a4SJacob Faibussowitsch { 471649abdd8aSBarry Smith Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 471751a7d1a8SHong Zhang 471851a7d1a8SHong Zhang PetscFunctionBegin; 47193ba16761SJacob Faibussowitsch if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 47209566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->id_r)); 47219566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->len_s)); 47229566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->len_r)); 47239566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->bi)); 47249566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->bj)); 47259566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_ri[0])); 47269566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_ri)); 47279566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_rj[0])); 47289566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->buf_rj)); 47299566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->coi)); 47309566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->coj)); 47319566063dSJacob Faibussowitsch PetscCall(PetscFree(merge->owners_co)); 47329566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&merge->rowmap)); 47339566063dSJacob Faibussowitsch PetscCall(PetscFree(merge)); 47343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 473551a7d1a8SHong Zhang } 473651a7d1a8SHong Zhang 4737c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 4738c6db04a5SJed Brown #include <petscbt.h> 47394ebed01fSBarry Smith 4740d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4741d71ae5a4SJacob Faibussowitsch { 4742ce94432eSBarry Smith MPI_Comm comm; 474355d1abb9SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4744b1d57f15SBarry Smith PetscMPIInt size, rank, taga, *len_s; 47456497c311SBarry Smith PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 47466497c311SBarry Smith PetscMPIInt proc, k; 4747b1d57f15SBarry Smith PetscInt **buf_ri, **buf_rj; 47486497c311SBarry Smith PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4749b1d57f15SBarry Smith PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 475055d1abb9SHong Zhang MPI_Request *s_waits, *r_waits; 475155d1abb9SHong Zhang MPI_Status *status; 4752fff043a9SJunchao Zhang const MatScalar *aa, *a_a; 4753dd6ea824SBarry Smith MatScalar **abuf_r, *ba_i; 475455d1abb9SHong Zhang Mat_Merge_SeqsToMPI *merge; 4755776b82aeSLisandro Dalcin PetscContainer container; 475655d1abb9SHong Zhang 475755d1abb9SHong Zhang PetscFunctionBegin; 47589566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 47599566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 47603c2c1871SHong Zhang 47619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 47629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 476355d1abb9SHong Zhang 47649566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 476528b400f6SJacob Faibussowitsch PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 47669566063dSJacob Faibussowitsch PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 47679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4768fff043a9SJunchao Zhang aa = a_a; 4769bf0cc555SLisandro Dalcin 477055d1abb9SHong Zhang bi = merge->bi; 477155d1abb9SHong Zhang bj = merge->bj; 477255d1abb9SHong Zhang buf_ri = merge->buf_ri; 477355d1abb9SHong Zhang buf_rj = merge->buf_rj; 477455d1abb9SHong Zhang 47759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &status)); 47767a2fc3feSBarry Smith owners = merge->rowmap->range; 477755d1abb9SHong Zhang len_s = merge->len_s; 477855d1abb9SHong Zhang 477955d1abb9SHong Zhang /* send and recv matrix values */ 47809566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 47819566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 478255d1abb9SHong Zhang 47839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 478455d1abb9SHong Zhang for (proc = 0, k = 0; proc < size; proc++) { 478555d1abb9SHong Zhang if (!len_s[proc]) continue; 478655d1abb9SHong Zhang i = owners[proc]; 47876497c311SBarry Smith PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 478855d1abb9SHong Zhang k++; 478955d1abb9SHong Zhang } 479055d1abb9SHong Zhang 47919566063dSJacob Faibussowitsch if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 47929566063dSJacob Faibussowitsch if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 47939566063dSJacob Faibussowitsch PetscCall(PetscFree(status)); 479455d1abb9SHong Zhang 47959566063dSJacob Faibussowitsch PetscCall(PetscFree(s_waits)); 47969566063dSJacob Faibussowitsch PetscCall(PetscFree(r_waits)); 479755d1abb9SHong Zhang 479855d1abb9SHong Zhang /* insert mat values of mpimat */ 47999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N, &ba_i)); 48009566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 480155d1abb9SHong Zhang 480255d1abb9SHong Zhang for (k = 0; k < merge->nrecv; k++) { 480355d1abb9SHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804f4f49eeaSPierre Jolivet nrows = *buf_ri_k[k]; 480555d1abb9SHong Zhang nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806a5b23f4aSJose E. Roman nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 480755d1abb9SHong Zhang } 480855d1abb9SHong Zhang 480955d1abb9SHong Zhang /* set values of ba */ 48107a2fc3feSBarry Smith m = merge->rowmap->n; 481155d1abb9SHong Zhang for (i = 0; i < m; i++) { 481255d1abb9SHong Zhang arow = owners[rank] + i; 481355d1abb9SHong Zhang bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 481455d1abb9SHong Zhang bnzi = bi[i + 1] - bi[i]; 48159566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ba_i, bnzi)); 481655d1abb9SHong Zhang 481755d1abb9SHong Zhang /* add local non-zero vals of this proc's seqmat into ba */ 481855d1abb9SHong Zhang anzi = ai[arow + 1] - ai[arow]; 481955d1abb9SHong Zhang aj = a->j + ai[arow]; 4820fff043a9SJunchao Zhang aa = a_a + ai[arow]; 482155d1abb9SHong Zhang nextaj = 0; 482255d1abb9SHong Zhang for (j = 0; nextaj < anzi; j++) { 482355d1abb9SHong Zhang if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 482455d1abb9SHong Zhang ba_i[j] += aa[nextaj++]; 482555d1abb9SHong Zhang } 482655d1abb9SHong Zhang } 482755d1abb9SHong Zhang 482855d1abb9SHong Zhang /* add received vals into ba */ 482955d1abb9SHong Zhang for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 483055d1abb9SHong Zhang /* i-th row */ 483155d1abb9SHong Zhang if (i == *nextrow[k]) { 483255d1abb9SHong Zhang anzi = *(nextai[k] + 1) - *nextai[k]; 4833f4f49eeaSPierre Jolivet aj = buf_rj[k] + *nextai[k]; 4834f4f49eeaSPierre Jolivet aa = abuf_r[k] + *nextai[k]; 483555d1abb9SHong Zhang nextaj = 0; 483655d1abb9SHong Zhang for (j = 0; nextaj < anzi; j++) { 483755d1abb9SHong Zhang if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 483855d1abb9SHong Zhang ba_i[j] += aa[nextaj++]; 483955d1abb9SHong Zhang } 484055d1abb9SHong Zhang } 48419371c9d4SSatish Balay nextrow[k]++; 48429371c9d4SSatish Balay nextai[k]++; 484355d1abb9SHong Zhang } 484455d1abb9SHong Zhang } 48459566063dSJacob Faibussowitsch PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 484655d1abb9SHong Zhang } 48479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 48489566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 48499566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 485055d1abb9SHong Zhang 48519566063dSJacob Faibussowitsch PetscCall(PetscFree(abuf_r[0])); 48529566063dSJacob Faibussowitsch PetscCall(PetscFree(abuf_r)); 48539566063dSJacob Faibussowitsch PetscCall(PetscFree(ba_i)); 48549566063dSJacob Faibussowitsch PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 48559566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 48563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 485755d1abb9SHong Zhang } 485838f152feSBarry Smith 4859d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4860d71ae5a4SJacob Faibussowitsch { 486155a3bba9SHong Zhang Mat B_mpi; 4862c2234fe3SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4863b1d57f15SBarry Smith PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4864b1d57f15SBarry Smith PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4865d0f46423SBarry Smith PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 48666497c311SBarry Smith PetscInt len, *dnz, *onz, bs, cbs; 4867c599c493SJunchao Zhang PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4868b1d57f15SBarry Smith PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 486955d1abb9SHong Zhang MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 487058cb9c82SHong Zhang MPI_Status *status; 48710298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 4872be0fcf8dSHong Zhang PetscBT lnkbt; 487351a7d1a8SHong Zhang Mat_Merge_SeqsToMPI *merge; 4874776b82aeSLisandro Dalcin PetscContainer container; 487502c68681SHong Zhang 4876e5f2cdd8SHong Zhang PetscFunctionBegin; 48779566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 48783c2c1871SHong Zhang 487938f152feSBarry Smith /* make sure it is a PETSc comm */ 48809566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 48819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 48829566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 488355d1abb9SHong Zhang 48849566063dSJacob Faibussowitsch PetscCall(PetscNew(&merge)); 48859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &status)); 4886e5f2cdd8SHong Zhang 48876abd8857SHong Zhang /* determine row ownership */ 48889566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 48899566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 48909566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 48919566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 48929566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(merge->rowmap)); 48939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &len_si)); 48949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &merge->len_s)); 489555d1abb9SHong Zhang 48967a2fc3feSBarry Smith m = merge->rowmap->n; 48977a2fc3feSBarry Smith owners = merge->rowmap->range; 48986abd8857SHong Zhang 48996abd8857SHong Zhang /* determine the number of messages to send, their lengths */ 49003e06a4e6SHong Zhang len_s = merge->len_s; 490151a7d1a8SHong Zhang 49022257cef7SHong Zhang len = 0; /* length of buf_si[] */ 4903c2234fe3SHong Zhang merge->nsend = 0; 49046497c311SBarry Smith for (PetscMPIInt proc = 0; proc < size; proc++) { 49052257cef7SHong Zhang len_si[proc] = 0; 49063e06a4e6SHong Zhang if (proc == rank) { 49076abd8857SHong Zhang len_s[proc] = 0; 49083e06a4e6SHong Zhang } else { 49096497c311SBarry Smith PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 49106497c311SBarry Smith PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 49113e06a4e6SHong Zhang } 49123e06a4e6SHong Zhang if (len_s[proc]) { 4913c2234fe3SHong Zhang merge->nsend++; 49142257cef7SHong Zhang nrows = 0; 49152257cef7SHong Zhang for (i = owners[proc]; i < owners[proc + 1]; i++) { 49162257cef7SHong Zhang if (ai[i + 1] > ai[i]) nrows++; 49172257cef7SHong Zhang } 49186497c311SBarry Smith PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 49192257cef7SHong Zhang len += len_si[proc]; 4920409913e3SHong Zhang } 492158cb9c82SHong Zhang } 4922409913e3SHong Zhang 49232257cef7SHong Zhang /* determine the number and length of messages to receive for ij-structure */ 49249566063dSJacob Faibussowitsch PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 49259566063dSJacob Faibussowitsch PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4926671beff6SHong Zhang 49273e06a4e6SHong Zhang /* post the Irecv of j-structure */ 49289566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(comm, &tagj)); 49299566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 493002c68681SHong Zhang 49313e06a4e6SHong Zhang /* post the Isend of j-structure */ 49329566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 49333e06a4e6SHong Zhang 49346497c311SBarry Smith for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4935409913e3SHong Zhang if (!len_s[proc]) continue; 493602c68681SHong Zhang i = owners[proc]; 49376497c311SBarry Smith PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 493851a7d1a8SHong Zhang k++; 493951a7d1a8SHong Zhang } 494051a7d1a8SHong Zhang 49413e06a4e6SHong Zhang /* receives and sends of j-structure are complete */ 49429566063dSJacob Faibussowitsch if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 49439566063dSJacob Faibussowitsch if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 494402c68681SHong Zhang 494502c68681SHong Zhang /* send and recv i-structure */ 49469566063dSJacob Faibussowitsch PetscCall(PetscCommGetNewTag(comm, &tagi)); 49479566063dSJacob Faibussowitsch PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 494802c68681SHong Zhang 49499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len + 1, &buf_s)); 49503e06a4e6SHong Zhang buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 49516497c311SBarry Smith for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 495202c68681SHong Zhang if (!len_s[proc]) continue; 49533e06a4e6SHong Zhang /* form outgoing message for i-structure: 49543e06a4e6SHong Zhang buf_si[0]: nrows to be sent 49553e06a4e6SHong Zhang [1:nrows]: row index (global) 49563e06a4e6SHong Zhang [nrows+1:2*nrows+1]: i-structure index 49573e06a4e6SHong Zhang */ 49582257cef7SHong Zhang nrows = len_si[proc] / 2 - 1; 49593e06a4e6SHong Zhang buf_si_i = buf_si + nrows + 1; 49603e06a4e6SHong Zhang buf_si[0] = nrows; 49613e06a4e6SHong Zhang buf_si_i[0] = 0; 49623e06a4e6SHong Zhang nrows = 0; 49633e06a4e6SHong Zhang for (i = owners[proc]; i < owners[proc + 1]; i++) { 49643e06a4e6SHong Zhang anzi = ai[i + 1] - ai[i]; 49653e06a4e6SHong Zhang if (anzi) { 49663e06a4e6SHong Zhang buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 49673e06a4e6SHong Zhang buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 49683e06a4e6SHong Zhang nrows++; 49693e06a4e6SHong Zhang } 49703e06a4e6SHong Zhang } 49716497c311SBarry Smith PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 497202c68681SHong Zhang k++; 49732257cef7SHong Zhang buf_si += len_si[proc]; 497402c68681SHong Zhang } 49752257cef7SHong Zhang 49769566063dSJacob Faibussowitsch if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 49779566063dSJacob Faibussowitsch if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 497802c68681SHong Zhang 49799566063dSJacob Faibussowitsch PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 498048a46eb9SPierre Jolivet for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 49813e06a4e6SHong Zhang 49829566063dSJacob Faibussowitsch PetscCall(PetscFree(len_si)); 49839566063dSJacob Faibussowitsch PetscCall(PetscFree(len_ri)); 49849566063dSJacob Faibussowitsch PetscCall(PetscFree(rj_waits)); 49859566063dSJacob Faibussowitsch PetscCall(PetscFree2(si_waits, sj_waits)); 49869566063dSJacob Faibussowitsch PetscCall(PetscFree(ri_waits)); 49879566063dSJacob Faibussowitsch PetscCall(PetscFree(buf_s)); 49889566063dSJacob Faibussowitsch PetscCall(PetscFree(status)); 498958cb9c82SHong Zhang 4990bcc1bcd5SHong Zhang /* compute a local seq matrix in each processor */ 499158cb9c82SHong Zhang /* allocate bi array and free space for accumulating nonzero column info */ 49929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &bi)); 499358cb9c82SHong Zhang bi[0] = 0; 499458cb9c82SHong Zhang 4995be0fcf8dSHong Zhang /* create and initialize a linked list */ 4996be0fcf8dSHong Zhang nlnk = N + 1; 49979566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 499858cb9c82SHong Zhang 4999bcc1bcd5SHong Zhang /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5000bcc1bcd5SHong Zhang len = ai[owners[rank + 1]] - ai[owners[rank]]; 50019566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 50022205254eSKarl Rupp 500358cb9c82SHong Zhang current_space = free_space; 500458cb9c82SHong Zhang 5005bcc1bcd5SHong Zhang /* determine symbolic info for each local row */ 50069566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 50071d79065fSBarry Smith 50083e06a4e6SHong Zhang for (k = 0; k < merge->nrecv; k++) { 50092257cef7SHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 50103e06a4e6SHong Zhang nrows = *buf_ri_k[k]; 50113e06a4e6SHong Zhang nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5012a5b23f4aSJose E. Roman nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 50133e06a4e6SHong Zhang } 50142257cef7SHong Zhang 5015d0609cedSBarry Smith MatPreallocateBegin(comm, m, n, dnz, onz); 5016bcc1bcd5SHong Zhang len = 0; 501758cb9c82SHong Zhang for (i = 0; i < m; i++) { 501858cb9c82SHong Zhang bnzi = 0; 501958cb9c82SHong Zhang /* add local non-zero cols of this proc's seqmat into lnk */ 502058cb9c82SHong Zhang arow = owners[rank] + i; 502158cb9c82SHong Zhang anzi = ai[arow + 1] - ai[arow]; 502258cb9c82SHong Zhang aj = a->j + ai[arow]; 50239566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 502458cb9c82SHong Zhang bnzi += nlnk; 502558cb9c82SHong Zhang /* add received col data into lnk */ 502651a7d1a8SHong Zhang for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 502755d1abb9SHong Zhang if (i == *nextrow[k]) { /* i-th row */ 50283e06a4e6SHong Zhang anzi = *(nextai[k] + 1) - *nextai[k]; 50293e06a4e6SHong Zhang aj = buf_rj[k] + *nextai[k]; 50309566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 50313e06a4e6SHong Zhang bnzi += nlnk; 50329371c9d4SSatish Balay nextrow[k]++; 50339371c9d4SSatish Balay nextai[k]++; 50343e06a4e6SHong Zhang } 503558cb9c82SHong Zhang } 5036bcc1bcd5SHong Zhang if (len < bnzi) len = bnzi; /* =max(bnzi) */ 503758cb9c82SHong Zhang 503858cb9c82SHong Zhang /* if free space is not available, make more free space */ 503948a46eb9SPierre Jolivet if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 504058cb9c82SHong Zhang /* copy data into free space, then initialize lnk */ 50419566063dSJacob Faibussowitsch PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 50429566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5043bcc1bcd5SHong Zhang 504458cb9c82SHong Zhang current_space->array += bnzi; 504558cb9c82SHong Zhang current_space->local_used += bnzi; 504658cb9c82SHong Zhang current_space->local_remaining -= bnzi; 504758cb9c82SHong Zhang 504858cb9c82SHong Zhang bi[i + 1] = bi[i] + bnzi; 504958cb9c82SHong Zhang } 5050bcc1bcd5SHong Zhang 50519566063dSJacob Faibussowitsch PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5052bcc1bcd5SHong Zhang 50539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 50549566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 50559566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 5056409913e3SHong Zhang 5057bcc1bcd5SHong Zhang /* create symbolic parallel matrix B_mpi */ 50589566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 50599566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &B_mpi)); 506054b84b50SHong Zhang if (n == PETSC_DECIDE) { 50619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 506254b84b50SHong Zhang } else { 50639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 506454b84b50SHong Zhang } 50659566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 50669566063dSJacob Faibussowitsch PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 50679566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5068d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 50699566063dSJacob Faibussowitsch PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 507058cb9c82SHong Zhang 507190431a8fSHong Zhang /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 50726abd8857SHong Zhang B_mpi->assembled = PETSC_FALSE; 5073affca5deSHong Zhang merge->bi = bi; 5074affca5deSHong Zhang merge->bj = bj; 507502c68681SHong Zhang merge->buf_ri = buf_ri; 507602c68681SHong Zhang merge->buf_rj = buf_rj; 50770298fd71SBarry Smith merge->coi = NULL; 50780298fd71SBarry Smith merge->coj = NULL; 50790298fd71SBarry Smith merge->owners_co = NULL; 5080affca5deSHong Zhang 50819566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 5082bf0cc555SLisandro Dalcin 5083affca5deSHong Zhang /* attach the supporting struct to B_mpi for reuse */ 50849566063dSJacob Faibussowitsch PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 50859566063dSJacob Faibussowitsch PetscCall(PetscContainerSetPointer(container, merge)); 508649abdd8aSBarry Smith PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 50879566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 50889566063dSJacob Faibussowitsch PetscCall(PetscContainerDestroy(&container)); 5089affca5deSHong Zhang *mpimat = B_mpi; 509038f152feSBarry Smith 50919566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 50923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5093e5f2cdd8SHong Zhang } 509425616d81SHong Zhang 5095cc4c1da9SBarry Smith /*@ 509611a5261eSBarry Smith MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5097d4036a1aSHong Zhang matrices from each processor 5098d4036a1aSHong Zhang 5099d083f849SBarry Smith Collective 5100d4036a1aSHong Zhang 5101d4036a1aSHong Zhang Input Parameters: 5102d4036a1aSHong Zhang + comm - the communicators the parallel matrix will live on 5103d4036a1aSHong Zhang . seqmat - the input sequential matrices 510411a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE`) 510511a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE`) 510611a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5107d4036a1aSHong Zhang 5108d4036a1aSHong Zhang Output Parameter: 5109d4036a1aSHong Zhang . mpimat - the parallel matrix generated 5110d4036a1aSHong Zhang 5111d4036a1aSHong Zhang Level: advanced 5112d4036a1aSHong Zhang 511311a5261eSBarry Smith Note: 5114d4036a1aSHong Zhang The dimensions of the sequential matrix in each processor MUST be the same. 5115d4036a1aSHong Zhang The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5116b6971eaeSBarry Smith destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 51172ef1f0ffSBarry Smith 51181cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5119d4036a1aSHong Zhang @*/ 5120d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5121d71ae5a4SJacob Faibussowitsch { 51227e63b356SHong Zhang PetscMPIInt size; 512355d1abb9SHong Zhang 512455d1abb9SHong Zhang PetscFunctionBegin; 51259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 51267e63b356SHong Zhang if (size == 1) { 51279566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 51287e63b356SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 51299566063dSJacob Faibussowitsch PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 51307e63b356SHong Zhang } else { 51319566063dSJacob Faibussowitsch PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 51327e63b356SHong Zhang } 51339566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 51343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51357e63b356SHong Zhang } 51369566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 513748a46eb9SPierre Jolivet if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 51389566063dSJacob Faibussowitsch PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 51399566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 51403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 514155d1abb9SHong Zhang } 51424ebed01fSBarry Smith 5143bc08b0f1SBarry Smith /*@ 51442920cce0SJacob Faibussowitsch MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 51458a9c020eSBarry Smith 51468a9c020eSBarry Smith Not Collective 51478a9c020eSBarry Smith 51482fe279fdSBarry Smith Input Parameter: 514920f4b53cSBarry Smith . A - the matrix 51508a9c020eSBarry Smith 51518a9c020eSBarry Smith Output Parameter: 51528a9c020eSBarry Smith . A_loc - the local sequential matrix generated 51538a9c020eSBarry Smith 51548a9c020eSBarry Smith Level: developer 51558a9c020eSBarry Smith 51568a9c020eSBarry Smith Notes: 51572920cce0SJacob Faibussowitsch The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 51582920cce0SJacob Faibussowitsch with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 51592920cce0SJacob Faibussowitsch `n` is the global column count obtained with `MatGetSize()` 51602920cce0SJacob Faibussowitsch 516111a5261eSBarry Smith In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 51628a9c020eSBarry Smith 51632920cce0SJacob Faibussowitsch For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 51642920cce0SJacob Faibussowitsch 516511a5261eSBarry Smith Destroy the matrix with `MatDestroy()` 51668a9c020eSBarry Smith 51671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 51688a9c020eSBarry Smith @*/ 5169d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5170d71ae5a4SJacob Faibussowitsch { 51718a9c020eSBarry Smith PetscBool mpi; 51728a9c020eSBarry Smith 51738a9c020eSBarry Smith PetscFunctionBegin; 51748a9c020eSBarry Smith PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 51758a9c020eSBarry Smith if (mpi) { 51768a9c020eSBarry Smith PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 51778a9c020eSBarry Smith } else { 51788a9c020eSBarry Smith *A_loc = A; 51798a9c020eSBarry Smith PetscCall(PetscObjectReference((PetscObject)*A_loc)); 51808a9c020eSBarry Smith } 51813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51828a9c020eSBarry Smith } 51838a9c020eSBarry Smith 51848a9c020eSBarry Smith /*@ 51852920cce0SJacob Faibussowitsch MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 518625616d81SHong Zhang 518732fba14fSHong Zhang Not Collective 518825616d81SHong Zhang 518925616d81SHong Zhang Input Parameters: 519025616d81SHong Zhang + A - the matrix 519111a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 519225616d81SHong Zhang 519325616d81SHong Zhang Output Parameter: 519425616d81SHong Zhang . A_loc - the local sequential matrix generated 519525616d81SHong Zhang 519625616d81SHong Zhang Level: developer 519725616d81SHong Zhang 519877c65a98SStefano Zampini Notes: 51992920cce0SJacob Faibussowitsch The matrix is created by taking all `A`'s local rows and putting them into a sequential 52002920cce0SJacob Faibussowitsch matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 52012920cce0SJacob Faibussowitsch `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 52022920cce0SJacob Faibussowitsch 520311a5261eSBarry Smith In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 52048a9c020eSBarry Smith 52052920cce0SJacob Faibussowitsch When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 52062920cce0SJacob Faibussowitsch with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 52072920cce0SJacob Faibussowitsch then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 52082920cce0SJacob Faibussowitsch and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 520977c65a98SStefano Zampini 52101cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 521125616d81SHong Zhang @*/ 5212d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5213d71ae5a4SJacob Faibussowitsch { 521401b7ae99SHong Zhang Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5215b78526a6SJose E. Roman Mat_SeqAIJ *mat, *a, *b; 5216b78526a6SJose E. Roman PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5217ce496241SStefano Zampini const PetscScalar *aa, *ba, *aav, *bav; 5218ce496241SStefano Zampini PetscScalar *ca, *cam; 521977c65a98SStefano Zampini PetscMPIInt size; 5220d0f46423SBarry Smith PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 52215a7d977cSHong Zhang PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 52228661ff28SBarry Smith PetscBool match; 522325616d81SHong Zhang 522425616d81SHong Zhang PetscFunctionBegin; 52259566063dSJacob Faibussowitsch PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 522628b400f6SJacob Faibussowitsch PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 52279566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 522877c65a98SStefano Zampini if (size == 1) { 522977c65a98SStefano Zampini if (scall == MAT_INITIAL_MATRIX) { 52309566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 523177c65a98SStefano Zampini *A_loc = mpimat->A; 523277c65a98SStefano Zampini } else if (scall == MAT_REUSE_MATRIX) { 52339566063dSJacob Faibussowitsch PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 523477c65a98SStefano Zampini } 52353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 523677c65a98SStefano Zampini } 523770a9ba44SHong Zhang 52389566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5239f4f49eeaSPierre Jolivet a = (Mat_SeqAIJ *)mpimat->A->data; 5240f4f49eeaSPierre Jolivet b = (Mat_SeqAIJ *)mpimat->B->data; 52419371c9d4SSatish Balay ai = a->i; 52429371c9d4SSatish Balay aj = a->j; 52439371c9d4SSatish Balay bi = b->i; 52449371c9d4SSatish Balay bj = b->j; 52459566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 52469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5247ce496241SStefano Zampini aa = aav; 5248ce496241SStefano Zampini ba = bav; 524901b7ae99SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 52509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1 + am, &ci)); 5251dea91ad1SHong Zhang ci[0] = 0; 5252ad540459SPierre Jolivet for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 52539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1 + ci[am], &cj)); 52549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5255dea91ad1SHong Zhang k = 0; 525601b7ae99SHong Zhang for (i = 0; i < am; i++) { 52575a7d977cSHong Zhang ncols_o = bi[i + 1] - bi[i]; 52585a7d977cSHong Zhang ncols_d = ai[i + 1] - ai[i]; 525901b7ae99SHong Zhang /* off-diagonal portion of A */ 52605a7d977cSHong Zhang for (jo = 0; jo < ncols_o; jo++) { 52615a7d977cSHong Zhang col = cmap[*bj]; 52625a7d977cSHong Zhang if (col >= cstart) break; 52639371c9d4SSatish Balay cj[k] = col; 52649371c9d4SSatish Balay bj++; 52655a7d977cSHong Zhang ca[k++] = *ba++; 52665a7d977cSHong Zhang } 52675a7d977cSHong Zhang /* diagonal portion of A */ 52685a7d977cSHong Zhang for (j = 0; j < ncols_d; j++) { 52695a7d977cSHong Zhang cj[k] = cstart + *aj++; 52705a7d977cSHong Zhang ca[k++] = *aa++; 52715a7d977cSHong Zhang } 52725a7d977cSHong Zhang /* off-diagonal portion of A */ 52735a7d977cSHong Zhang for (j = jo; j < ncols_o; j++) { 52745a7d977cSHong Zhang cj[k] = cmap[*bj++]; 52755a7d977cSHong Zhang ca[k++] = *ba++; 52765a7d977cSHong Zhang } 527725616d81SHong Zhang } 5278dea91ad1SHong Zhang /* put together the new matrix */ 52799566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5280dea91ad1SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5281dea91ad1SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 5282dea91ad1SHong Zhang mat = (Mat_SeqAIJ *)(*A_loc)->data; 5283e6b907acSBarry Smith mat->free_a = PETSC_TRUE; 5284e6b907acSBarry Smith mat->free_ij = PETSC_TRUE; 5285dea91ad1SHong Zhang mat->nonew = 0; 52865a7d977cSHong Zhang } else if (scall == MAT_REUSE_MATRIX) { 52875a7d977cSHong Zhang mat = (Mat_SeqAIJ *)(*A_loc)->data; 5288fff043a9SJunchao Zhang ci = mat->i; 5289fff043a9SJunchao Zhang cj = mat->j; 52909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 52915a7d977cSHong Zhang for (i = 0; i < am; i++) { 52925a7d977cSHong Zhang /* off-diagonal portion of A */ 52935a7d977cSHong Zhang ncols_o = bi[i + 1] - bi[i]; 52945a7d977cSHong Zhang for (jo = 0; jo < ncols_o; jo++) { 52955a7d977cSHong Zhang col = cmap[*bj]; 52965a7d977cSHong Zhang if (col >= cstart) break; 52979371c9d4SSatish Balay *cam++ = *ba++; 52989371c9d4SSatish Balay bj++; 52995a7d977cSHong Zhang } 53005a7d977cSHong Zhang /* diagonal portion of A */ 5301ecc9b87dSHong Zhang ncols_d = ai[i + 1] - ai[i]; 5302a77337e4SBarry Smith for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 53035a7d977cSHong Zhang /* off-diagonal portion of A */ 5304f33d1a9aSHong Zhang for (j = jo; j < ncols_o; j++) { 53059371c9d4SSatish Balay *cam++ = *ba++; 53069371c9d4SSatish Balay bj++; 5307f33d1a9aSHong Zhang } 53085a7d977cSHong Zhang } 53099566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 531098921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 53119566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 53129566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 53139566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 53143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 531525616d81SHong Zhang } 531625616d81SHong Zhang 5317ed502f03SStefano Zampini /*@ 531811a5261eSBarry Smith MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 53194cf0e950SBarry Smith mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5320ed502f03SStefano Zampini 5321ed502f03SStefano Zampini Not Collective 5322ed502f03SStefano Zampini 5323ed502f03SStefano Zampini Input Parameters: 5324ed502f03SStefano Zampini + A - the matrix 532511a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5326ed502f03SStefano Zampini 5327d8d19677SJose E. Roman Output Parameters: 53282ef1f0ffSBarry Smith + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5329ed502f03SStefano Zampini - A_loc - the local sequential matrix generated 5330ed502f03SStefano Zampini 5331ed502f03SStefano Zampini Level: developer 5332ed502f03SStefano Zampini 533311a5261eSBarry Smith Note: 53342ef1f0ffSBarry Smith This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 53354cf0e950SBarry Smith part, then those associated with the off-diagonal part (in its local ordering) 5336ed502f03SStefano Zampini 53371cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5338ed502f03SStefano Zampini @*/ 5339d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5340d71ae5a4SJacob Faibussowitsch { 5341ed502f03SStefano Zampini Mat Ao, Ad; 5342ed502f03SStefano Zampini const PetscInt *cmap; 5343ed502f03SStefano Zampini PetscMPIInt size; 5344ed502f03SStefano Zampini PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5345ed502f03SStefano Zampini 5346ed502f03SStefano Zampini PetscFunctionBegin; 53479566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 53489566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5349ed502f03SStefano Zampini if (size == 1) { 5350ed502f03SStefano Zampini if (scall == MAT_INITIAL_MATRIX) { 53519566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Ad)); 5352ed502f03SStefano Zampini *A_loc = Ad; 5353ed502f03SStefano Zampini } else if (scall == MAT_REUSE_MATRIX) { 53549566063dSJacob Faibussowitsch PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5355ed502f03SStefano Zampini } 53569566063dSJacob Faibussowitsch if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 53573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5358ed502f03SStefano Zampini } 53599566063dSJacob Faibussowitsch PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 53609566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5361ed502f03SStefano Zampini if (f) { 53629566063dSJacob Faibussowitsch PetscCall((*f)(A, scall, glob, A_loc)); 5363ed502f03SStefano Zampini } else { 5364ed502f03SStefano Zampini Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5365ed502f03SStefano Zampini Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5366ed502f03SStefano Zampini Mat_SeqAIJ *c; 5367ed502f03SStefano Zampini PetscInt *ai = a->i, *aj = a->j; 5368ed502f03SStefano Zampini PetscInt *bi = b->i, *bj = b->j; 5369ed502f03SStefano Zampini PetscInt *ci, *cj; 5370ed502f03SStefano Zampini const PetscScalar *aa, *ba; 5371ed502f03SStefano Zampini PetscScalar *ca; 5372ed502f03SStefano Zampini PetscInt i, j, am, dn, on; 5373ed502f03SStefano Zampini 53749566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(Ad, &am, &dn)); 53759566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(Ao, NULL, &on)); 53769566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 53779566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5378ed502f03SStefano Zampini if (scall == MAT_INITIAL_MATRIX) { 5379ed502f03SStefano Zampini PetscInt k; 53809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1 + am, &ci)); 53819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 53829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5383ed502f03SStefano Zampini ci[0] = 0; 5384ed502f03SStefano Zampini for (i = 0, k = 0; i < am; i++) { 5385ed502f03SStefano Zampini const PetscInt ncols_o = bi[i + 1] - bi[i]; 5386ed502f03SStefano Zampini const PetscInt ncols_d = ai[i + 1] - ai[i]; 5387ed502f03SStefano Zampini ci[i + 1] = ci[i] + ncols_o + ncols_d; 5388ed502f03SStefano Zampini /* diagonal portion of A */ 5389ed502f03SStefano Zampini for (j = 0; j < ncols_d; j++, k++) { 5390ed502f03SStefano Zampini cj[k] = *aj++; 5391ed502f03SStefano Zampini ca[k] = *aa++; 5392ed502f03SStefano Zampini } 5393ed502f03SStefano Zampini /* off-diagonal portion of A */ 5394ed502f03SStefano Zampini for (j = 0; j < ncols_o; j++, k++) { 5395ed502f03SStefano Zampini cj[k] = dn + *bj++; 5396ed502f03SStefano Zampini ca[k] = *ba++; 5397ed502f03SStefano Zampini } 5398ed502f03SStefano Zampini } 5399ed502f03SStefano Zampini /* put together the new matrix */ 54009566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5401ed502f03SStefano Zampini /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5402ed502f03SStefano Zampini /* Since these are PETSc arrays, change flags to free them as necessary. */ 5403ed502f03SStefano Zampini c = (Mat_SeqAIJ *)(*A_loc)->data; 5404ed502f03SStefano Zampini c->free_a = PETSC_TRUE; 5405ed502f03SStefano Zampini c->free_ij = PETSC_TRUE; 5406ed502f03SStefano Zampini c->nonew = 0; 54079566063dSJacob Faibussowitsch PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5408ed502f03SStefano Zampini } else if (scall == MAT_REUSE_MATRIX) { 54099566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5410ed502f03SStefano Zampini for (i = 0; i < am; i++) { 5411ed502f03SStefano Zampini const PetscInt ncols_d = ai[i + 1] - ai[i]; 5412ed502f03SStefano Zampini const PetscInt ncols_o = bi[i + 1] - bi[i]; 5413ed502f03SStefano Zampini /* diagonal portion of A */ 5414ed502f03SStefano Zampini for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5415ed502f03SStefano Zampini /* off-diagonal portion of A */ 5416ed502f03SStefano Zampini for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5417ed502f03SStefano Zampini } 54189566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 541998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 54209566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 54219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5422ed502f03SStefano Zampini if (glob) { 5423ed502f03SStefano Zampini PetscInt cst, *gidx; 5424ed502f03SStefano Zampini 54259566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 54269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dn + on, &gidx)); 5427ed502f03SStefano Zampini for (i = 0; i < dn; i++) gidx[i] = cst + i; 5428ed502f03SStefano Zampini for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 54299566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5430ed502f03SStefano Zampini } 5431ed502f03SStefano Zampini } 54329566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 54333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5434ed502f03SStefano Zampini } 5435ed502f03SStefano Zampini 543632fba14fSHong Zhang /*@C 543711a5261eSBarry Smith MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 543832fba14fSHong Zhang 543932fba14fSHong Zhang Not Collective 544032fba14fSHong Zhang 544132fba14fSHong Zhang Input Parameters: 544232fba14fSHong Zhang + A - the matrix 544311a5261eSBarry Smith . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 544420f4b53cSBarry Smith . row - index set of rows to extract (or `NULL`) 544520f4b53cSBarry Smith - col - index set of columns to extract (or `NULL`) 544632fba14fSHong Zhang 544732fba14fSHong Zhang Output Parameter: 544832fba14fSHong Zhang . A_loc - the local sequential matrix generated 544932fba14fSHong Zhang 545032fba14fSHong Zhang Level: developer 545132fba14fSHong Zhang 54521cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 545332fba14fSHong Zhang @*/ 5454d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5455d71ae5a4SJacob Faibussowitsch { 545632fba14fSHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 545732fba14fSHong Zhang PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 545832fba14fSHong Zhang IS isrowa, iscola; 545932fba14fSHong Zhang Mat *aloc; 54604a2b5492SBarry Smith PetscBool match; 546132fba14fSHong Zhang 546232fba14fSHong Zhang PetscFunctionBegin; 54639566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 546428b400f6SJacob Faibussowitsch PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 54659566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 546632fba14fSHong Zhang if (!row) { 54679371c9d4SSatish Balay start = A->rmap->rstart; 54689371c9d4SSatish Balay end = A->rmap->rend; 54699566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 547032fba14fSHong Zhang } else { 547132fba14fSHong Zhang isrowa = *row; 547232fba14fSHong Zhang } 547332fba14fSHong Zhang if (!col) { 5474d0f46423SBarry Smith start = A->cmap->rstart; 547532fba14fSHong Zhang cmap = a->garray; 5476d0f46423SBarry Smith nzA = a->A->cmap->n; 5477d0f46423SBarry Smith nzB = a->B->cmap->n; 54789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nzA + nzB, &idx)); 547932fba14fSHong Zhang ncols = 0; 548032fba14fSHong Zhang for (i = 0; i < nzB; i++) { 548132fba14fSHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 548232fba14fSHong Zhang else break; 548332fba14fSHong Zhang } 548432fba14fSHong Zhang imark = i; 548532fba14fSHong Zhang for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 548632fba14fSHong Zhang for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 54879566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 548832fba14fSHong Zhang } else { 548932fba14fSHong Zhang iscola = *col; 549032fba14fSHong Zhang } 549132fba14fSHong Zhang if (scall != MAT_INITIAL_MATRIX) { 54929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1, &aloc)); 549332fba14fSHong Zhang aloc[0] = *A_loc; 549432fba14fSHong Zhang } 54959566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5496109e0772SStefano Zampini if (!col) { /* attach global id of condensed columns */ 54979566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5498109e0772SStefano Zampini } 549932fba14fSHong Zhang *A_loc = aloc[0]; 55009566063dSJacob Faibussowitsch PetscCall(PetscFree(aloc)); 550148a46eb9SPierre Jolivet if (!row) PetscCall(ISDestroy(&isrowa)); 550248a46eb9SPierre Jolivet if (!col) PetscCall(ISDestroy(&iscola)); 55039566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 55043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 550532fba14fSHong Zhang } 550632fba14fSHong Zhang 55075c65b9ecSFande Kong /* 55085c65b9ecSFande Kong * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 55095c65b9ecSFande Kong * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 55105c65b9ecSFande Kong * on a global size. 55115c65b9ecSFande Kong * */ 5512ba38deedSJacob Faibussowitsch static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5513d71ae5a4SJacob Faibussowitsch { 55145c65b9ecSFande Kong Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5515f4f49eeaSPierre Jolivet Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5516131c27b5Sprj- PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5517131c27b5Sprj- PetscMPIInt owner; 55185c65b9ecSFande Kong PetscSFNode *iremote, *oiremote; 55195c65b9ecSFande Kong const PetscInt *lrowindices; 55205c65b9ecSFande Kong PetscSF sf, osf; 55215c65b9ecSFande Kong PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 55225c65b9ecSFande Kong PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 55235c65b9ecSFande Kong MPI_Comm comm; 55245c65b9ecSFande Kong ISLocalToGlobalMapping mapping; 5525fff043a9SJunchao Zhang const PetscScalar *pd_a, *po_a; 55265c65b9ecSFande Kong 55275c65b9ecSFande Kong PetscFunctionBegin; 55289566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 55295c65b9ecSFande Kong /* plocalsize is the number of roots 55305c65b9ecSFande Kong * nrows is the number of leaves 55315c65b9ecSFande Kong * */ 55329566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 55339566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(rows, &nrows)); 55349566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &iremote)); 55359566063dSJacob Faibussowitsch PetscCall(ISGetIndices(rows, &lrowindices)); 55365c65b9ecSFande Kong for (i = 0; i < nrows; i++) { 55375c65b9ecSFande Kong /* Find a remote index and an owner for a row 55385c65b9ecSFande Kong * The row could be local or remote 55395c65b9ecSFande Kong * */ 554034bcad68SFande Kong owner = 0; 554134bcad68SFande Kong lidx = 0; 55429566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 55435c65b9ecSFande Kong iremote[i].index = lidx; 55445c65b9ecSFande Kong iremote[i].rank = owner; 55455c65b9ecSFande Kong } 55465c65b9ecSFande Kong /* Create SF to communicate how many nonzero columns for each row */ 55479566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &sf)); 5548f332b1cbSPierre Jolivet /* SF will figure out the number of nonzero columns for each row, and their 55495c65b9ecSFande Kong * offsets 55505c65b9ecSFande Kong * */ 55519566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 55529566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 55539566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 5554bc8e477aSFande Kong 55559566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 55569566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 55579566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &pnnz)); 55585c65b9ecSFande Kong roffsets[0] = 0; 55595c65b9ecSFande Kong roffsets[1] = 0; 55605c65b9ecSFande Kong for (i = 0; i < plocalsize; i++) { 55614cf0e950SBarry Smith /* diagonal */ 55625c65b9ecSFande Kong nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 55634cf0e950SBarry Smith /* off-diagonal */ 55645c65b9ecSFande Kong nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 55655c65b9ecSFande Kong /* compute offsets so that we relative location for each row */ 55665c65b9ecSFande Kong roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 55675c65b9ecSFande Kong roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 55685c65b9ecSFande Kong } 55699566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 55709566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 55715c65b9ecSFande Kong /* 'r' means root, and 'l' means leaf */ 55729566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 55739566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 55749566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 55759566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 55769566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 55779566063dSJacob Faibussowitsch PetscCall(PetscFree(roffsets)); 55789566063dSJacob Faibussowitsch PetscCall(PetscFree(nrcols)); 55795c65b9ecSFande Kong dntotalcols = 0; 55805c65b9ecSFande Kong ontotalcols = 0; 5581bc8e477aSFande Kong ncol = 0; 55825c65b9ecSFande Kong for (i = 0; i < nrows; i++) { 55835c65b9ecSFande Kong pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5584bc8e477aSFande Kong ncol = PetscMax(pnnz[i], ncol); 55854cf0e950SBarry Smith /* diagonal */ 55865c65b9ecSFande Kong dntotalcols += nlcols[i * 2 + 0]; 55874cf0e950SBarry Smith /* off-diagonal */ 55885c65b9ecSFande Kong ontotalcols += nlcols[i * 2 + 1]; 55895c65b9ecSFande Kong } 55905c65b9ecSFande Kong /* We do not need to figure the right number of columns 55915c65b9ecSFande Kong * since all the calculations will be done by going through the raw data 55925c65b9ecSFande Kong * */ 55939566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 55949566063dSJacob Faibussowitsch PetscCall(MatSetUp(*P_oth)); 55959566063dSJacob Faibussowitsch PetscCall(PetscFree(pnnz)); 55965c65b9ecSFande Kong p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 55974cf0e950SBarry Smith /* diagonal */ 55989566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(dntotalcols, &iremote)); 55994cf0e950SBarry Smith /* off-diagonal */ 56009566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 56014cf0e950SBarry Smith /* diagonal */ 56029566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 56034cf0e950SBarry Smith /* off-diagonal */ 56049566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 56055c65b9ecSFande Kong dntotalcols = 0; 56065c65b9ecSFande Kong ontotalcols = 0; 56075c65b9ecSFande Kong ntotalcols = 0; 56085c65b9ecSFande Kong for (i = 0; i < nrows; i++) { 560934bcad68SFande Kong owner = 0; 56109566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 56115c65b9ecSFande Kong /* Set iremote for diag matrix */ 56125c65b9ecSFande Kong for (j = 0; j < nlcols[i * 2 + 0]; j++) { 56135c65b9ecSFande Kong iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 56145c65b9ecSFande Kong iremote[dntotalcols].rank = owner; 56155c65b9ecSFande Kong /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 56165c65b9ecSFande Kong ilocal[dntotalcols++] = ntotalcols++; 56175c65b9ecSFande Kong } 56184cf0e950SBarry Smith /* off-diagonal */ 56195c65b9ecSFande Kong for (j = 0; j < nlcols[i * 2 + 1]; j++) { 56205c65b9ecSFande Kong oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 56215c65b9ecSFande Kong oiremote[ontotalcols].rank = owner; 56225c65b9ecSFande Kong oilocal[ontotalcols++] = ntotalcols++; 56235c65b9ecSFande Kong } 56245c65b9ecSFande Kong } 56259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(rows, &lrowindices)); 56269566063dSJacob Faibussowitsch PetscCall(PetscFree(loffsets)); 56279566063dSJacob Faibussowitsch PetscCall(PetscFree(nlcols)); 56289566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &sf)); 56295c65b9ecSFande Kong /* P serves as roots and P_oth is leaves 56305c65b9ecSFande Kong * Diag matrix 56315c65b9ecSFande Kong * */ 56329566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 56339566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf)); 56349566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf)); 56355c65b9ecSFande Kong 56369566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &osf)); 56374cf0e950SBarry Smith /* off-diagonal */ 56389566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 56399566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(osf)); 56409566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(osf)); 56419566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 56429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 56434cf0e950SBarry Smith /* operate on the matrix internal data to save memory */ 56449566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 56459566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 56469566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 56475c65b9ecSFande Kong /* Convert to global indices for diag matrix */ 56485c65b9ecSFande Kong for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 56499566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 56505c65b9ecSFande Kong /* We want P_oth store global indices */ 56519566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 56525c65b9ecSFande Kong /* Use memory scalable approach */ 56539566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 56549566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 56559566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 56569566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 56575c65b9ecSFande Kong /* Convert back to local indices */ 56585c65b9ecSFande Kong for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 56599566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 56605c65b9ecSFande Kong nout = 0; 56619566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 566208401ef6SPierre Jolivet PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 56639566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 56645c65b9ecSFande Kong /* Exchange values */ 56659566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 56669566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 56679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 56689566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 56695c65b9ecSFande Kong /* Stop PETSc from shrinking memory */ 56705c65b9ecSFande Kong for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 56719566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 56729566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 56735c65b9ecSFande Kong /* Attach PetscSF objects to P_oth so that we can reuse it later */ 56749566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 56759566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 56769566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 56779566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&osf)); 56783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56795c65b9ecSFande Kong } 56805c65b9ecSFande Kong 56815c65b9ecSFande Kong /* 56825c65b9ecSFande Kong * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 56835c65b9ecSFande Kong * This supports MPIAIJ and MAIJ 56845c65b9ecSFande Kong * */ 5685d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5686d71ae5a4SJacob Faibussowitsch { 56875c65b9ecSFande Kong Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5688bc8e477aSFande Kong Mat_SeqAIJ *p_oth; 5689bc8e477aSFande Kong IS rows, map; 5690bc8e477aSFande Kong PetscHMapI hamp; 5691bc8e477aSFande Kong PetscInt i, htsize, *rowindices, off, *mapping, key, count; 56925c65b9ecSFande Kong MPI_Comm comm; 56935c65b9ecSFande Kong PetscSF sf, osf; 5694bc8e477aSFande Kong PetscBool has; 56955c65b9ecSFande Kong 56965c65b9ecSFande Kong PetscFunctionBegin; 56979566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 56989566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 56995c65b9ecSFande Kong /* If it is the first time, create an index set of off-diag nonzero columns of A, 57005c65b9ecSFande Kong * and then create a submatrix (that often is an overlapping matrix) 57015c65b9ecSFande Kong * */ 57025c65b9ecSFande Kong if (reuse == MAT_INITIAL_MATRIX) { 57035c65b9ecSFande Kong /* Use a hash table to figure out unique keys */ 5704eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 57059566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5706bc8e477aSFande Kong count = 0; 5707bc8e477aSFande Kong /* Assume that a->g is sorted, otherwise the following does not make sense */ 5708bc8e477aSFande Kong for (i = 0; i < a->B->cmap->n; i++) { 5709bc8e477aSFande Kong key = a->garray[i] / dof; 57109566063dSJacob Faibussowitsch PetscCall(PetscHMapIHas(hamp, key, &has)); 5711bc8e477aSFande Kong if (!has) { 5712bc8e477aSFande Kong mapping[i] = count; 57139566063dSJacob Faibussowitsch PetscCall(PetscHMapISet(hamp, key, count++)); 5714bc8e477aSFande Kong } else { 5715bc8e477aSFande Kong /* Current 'i' has the same value the previous step */ 5716bc8e477aSFande Kong mapping[i] = count - 1; 57175c65b9ecSFande Kong } 5718bc8e477aSFande Kong } 57199566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 57209566063dSJacob Faibussowitsch PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5721eec179cfSJacob Faibussowitsch PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 57229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(htsize, &rowindices)); 57235c65b9ecSFande Kong off = 0; 57249566063dSJacob Faibussowitsch PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 57259566063dSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&hamp)); 57269566063dSJacob Faibussowitsch PetscCall(PetscSortInt(htsize, rowindices)); 57279566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 57285c65b9ecSFande Kong /* In case, the matrix was already created but users want to recreate the matrix */ 57299566063dSJacob Faibussowitsch PetscCall(MatDestroy(P_oth)); 57309566063dSJacob Faibussowitsch PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 57319566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 57329566063dSJacob Faibussowitsch PetscCall(ISDestroy(&map)); 57339566063dSJacob Faibussowitsch PetscCall(ISDestroy(&rows)); 57345c65b9ecSFande Kong } else if (reuse == MAT_REUSE_MATRIX) { 57355c65b9ecSFande Kong /* If matrix was already created, we simply update values using SF objects 573635cb6cd3SPierre Jolivet * that as attached to the matrix earlier. 5737fff043a9SJunchao Zhang */ 5738fff043a9SJunchao Zhang const PetscScalar *pd_a, *po_a; 5739fff043a9SJunchao Zhang 57409566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 57419566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 574208401ef6SPierre Jolivet PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 57435c65b9ecSFande Kong p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 57445c65b9ecSFande Kong /* Update values in place */ 57459566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 57469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 57479566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 57489566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 57499566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 57509566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 57519566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 57529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 57536718818eSStefano Zampini } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 57549566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 57553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 57565c65b9ecSFande Kong } 57575c65b9ecSFande Kong 575825616d81SHong Zhang /*@C 575920f4b53cSBarry Smith MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 576025616d81SHong Zhang 5761c3339decSBarry Smith Collective 576225616d81SHong Zhang 576325616d81SHong Zhang Input Parameters: 576411a5261eSBarry Smith + A - the first matrix in `MATMPIAIJ` format 576511a5261eSBarry Smith . B - the second matrix in `MATMPIAIJ` format 576611a5261eSBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 576725616d81SHong Zhang 5768f1a722f8SMatthew G. Knepley Output Parameters: 576927430b45SBarry Smith + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 577027430b45SBarry Smith . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5771f1a722f8SMatthew G. Knepley - B_seq - the sequential matrix generated 577225616d81SHong Zhang 577325616d81SHong Zhang Level: developer 577425616d81SHong Zhang 577520f4b53cSBarry Smith .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 577625616d81SHong Zhang @*/ 5777d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5778d71ae5a4SJacob Faibussowitsch { 5779899cda47SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5780b1d57f15SBarry Smith PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 578125616d81SHong Zhang IS isrowb, iscolb; 57820298fd71SBarry Smith Mat *bseq = NULL; 578325616d81SHong Zhang 578425616d81SHong Zhang PetscFunctionBegin; 578520f4b53cSBarry Smith PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 578620f4b53cSBarry Smith A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 57879566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 578825616d81SHong Zhang 578925616d81SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 5790d0f46423SBarry Smith start = A->cmap->rstart; 579125616d81SHong Zhang cmap = a->garray; 5792d0f46423SBarry Smith nzA = a->A->cmap->n; 5793d0f46423SBarry Smith nzB = a->B->cmap->n; 57949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nzA + nzB, &idx)); 579525616d81SHong Zhang ncols = 0; 57960390132cSHong Zhang for (i = 0; i < nzB; i++) { /* row < local row index */ 579725616d81SHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 579825616d81SHong Zhang else break; 579925616d81SHong Zhang } 580025616d81SHong Zhang imark = i; 58010390132cSHong Zhang for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 58020390132cSHong Zhang for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 58039566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 58049566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 580525616d81SHong Zhang } else { 580608401ef6SPierre Jolivet PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 58079371c9d4SSatish Balay isrowb = *rowb; 58089371c9d4SSatish Balay iscolb = *colb; 58099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(1, &bseq)); 581025616d81SHong Zhang bseq[0] = *B_seq; 581125616d81SHong Zhang } 58129566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 581325616d81SHong Zhang *B_seq = bseq[0]; 58149566063dSJacob Faibussowitsch PetscCall(PetscFree(bseq)); 581525616d81SHong Zhang if (!rowb) { 58169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrowb)); 581725616d81SHong Zhang } else { 581825616d81SHong Zhang *rowb = isrowb; 581925616d81SHong Zhang } 582025616d81SHong Zhang if (!colb) { 58219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscolb)); 582225616d81SHong Zhang } else { 582325616d81SHong Zhang *colb = iscolb; 582425616d81SHong Zhang } 58259566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 58263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 582725616d81SHong Zhang } 5828429d309bSHong Zhang 5829f8487c73SHong Zhang /* 583027430b45SBarry Smith MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 583101b7ae99SHong Zhang of the OFF-DIAGONAL portion of local A 5832429d309bSHong Zhang 5833c3339decSBarry Smith Collective 5834429d309bSHong Zhang 5835429d309bSHong Zhang Input Parameters: 583627430b45SBarry Smith + A,B - the matrices in `MATMPIAIJ` format 583727430b45SBarry Smith - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5838429d309bSHong Zhang 5839429d309bSHong Zhang Output Parameter: 58400298fd71SBarry Smith + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 58410298fd71SBarry Smith . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 58420298fd71SBarry Smith . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5843598bc09dSHong Zhang - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5844429d309bSHong Zhang 584511a5261eSBarry Smith Developer Note: 584611a5261eSBarry Smith This directly accesses information inside the VecScatter associated with the matrix-vector product 58476eb45d04SBarry Smith for this matrix. This is not desirable.. 58486eb45d04SBarry Smith 5849429d309bSHong Zhang Level: developer 5850429d309bSHong Zhang 5851f8487c73SHong Zhang */ 58526497c311SBarry Smith 5853d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5854d71ae5a4SJacob Faibussowitsch { 5855899cda47SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 58564b8d542aSHong Zhang VecScatter ctx; 5857ce94432eSBarry Smith MPI_Comm comm; 58583515ee7fSJunchao Zhang const PetscMPIInt *rprocs, *sprocs; 58596497c311SBarry Smith PetscMPIInt nrecvs, nsends; 58603515ee7fSJunchao Zhang const PetscInt *srow, *rstarts, *sstarts; 5861277f51e8SBarry Smith PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 58626497c311SBarry Smith PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5863277f51e8SBarry Smith PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5864ddea5d60SJunchao Zhang MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5865ddea5d60SJunchao Zhang PetscMPIInt size, tag, rank, nreqs; 5866429d309bSHong Zhang 5867429d309bSHong Zhang PetscFunctionBegin; 58689566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 58699566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 5870a7c7454dSHong Zhang 587120f4b53cSBarry Smith PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 587220f4b53cSBarry Smith A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 58739566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 58749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5875a6b2eed2SHong Zhang 5876ec07b8f8SHong Zhang if (size == 1) { 5877ec07b8f8SHong Zhang startsj_s = NULL; 5878ec07b8f8SHong Zhang bufa_ptr = NULL; 587952f7967eSHong Zhang *B_oth = NULL; 58803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5881ec07b8f8SHong Zhang } 5882ec07b8f8SHong Zhang 5883fa83eaafSHong Zhang ctx = a->Mvctx; 58844b8d542aSHong Zhang tag = ((PetscObject)ctx)->tag; 58854b8d542aSHong Zhang 58869566063dSJacob Faibussowitsch PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 58873515ee7fSJunchao Zhang /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 58889566063dSJacob Faibussowitsch PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 58899566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 58909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nreqs, &reqs)); 5891ddea5d60SJunchao Zhang rwaits = reqs; 58928e3a54c0SPierre Jolivet swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5893429d309bSHong Zhang 5894b7f45c76SHong Zhang if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5895429d309bSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 5896a6b2eed2SHong Zhang /* i-array */ 5897a6b2eed2SHong Zhang /* post receives */ 58989566063dSJacob Faibussowitsch if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5899a6b2eed2SHong Zhang for (i = 0; i < nrecvs; i++) { 590074268593SBarry Smith rowlen = rvalues + rstarts[i] * rbs; 5901e42f35eeSHong Zhang nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 59026497c311SBarry Smith PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5903429d309bSHong Zhang } 5904a6b2eed2SHong Zhang 5905a6b2eed2SHong Zhang /* pack the outgoing message */ 59069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 59072205254eSKarl Rupp 59082205254eSKarl Rupp sstartsj[0] = 0; 59092205254eSKarl Rupp rstartsj[0] = 0; 5910a6b2eed2SHong Zhang len = 0; /* total length of j or a array to be sent */ 59113515ee7fSJunchao Zhang if (nsends) { 59123515ee7fSJunchao Zhang k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 59139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 59143515ee7fSJunchao Zhang } 5915a6b2eed2SHong Zhang for (i = 0; i < nsends; i++) { 59163515ee7fSJunchao Zhang rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5917e42f35eeSHong Zhang nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 591887025532SHong Zhang for (j = 0; j < nrows; j++) { 5919d0f46423SBarry Smith row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5920e42f35eeSHong Zhang for (l = 0; l < sbs; l++) { 59219566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 59222205254eSKarl Rupp 5923e42f35eeSHong Zhang rowlen[j * sbs + l] = ncols; 59242205254eSKarl Rupp 5925e42f35eeSHong Zhang len += ncols; 59269566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5927e42f35eeSHong Zhang } 5928a6b2eed2SHong Zhang k++; 5929429d309bSHong Zhang } 59306497c311SBarry Smith PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 59312205254eSKarl Rupp 5932dea91ad1SHong Zhang sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5933429d309bSHong Zhang } 593487025532SHong Zhang /* recvs and sends of i-array are completed */ 59359566063dSJacob Faibussowitsch if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 59369566063dSJacob Faibussowitsch PetscCall(PetscFree(svalues)); 5937e42f35eeSHong Zhang 5938a6b2eed2SHong Zhang /* allocate buffers for sending j and a arrays */ 59399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len + 1, &bufj)); 59409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len + 1, &bufa)); 5941a6b2eed2SHong Zhang 594287025532SHong Zhang /* create i-array of B_oth */ 59439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 59442205254eSKarl Rupp 594587025532SHong Zhang b_othi[0] = 0; 5946a6b2eed2SHong Zhang len = 0; /* total length of j or a array to be received */ 5947a6b2eed2SHong Zhang k = 0; 5948a6b2eed2SHong Zhang for (i = 0; i < nrecvs; i++) { 59493515ee7fSJunchao Zhang rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 59503515ee7fSJunchao Zhang nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 595187025532SHong Zhang for (j = 0; j < nrows; j++) { 595287025532SHong Zhang b_othi[k + 1] = b_othi[k] + rowlen[j]; 59539566063dSJacob Faibussowitsch PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5954f91af8c7SBarry Smith k++; 5955a6b2eed2SHong Zhang } 5956dea91ad1SHong Zhang rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5957a6b2eed2SHong Zhang } 59589566063dSJacob Faibussowitsch PetscCall(PetscFree(rvalues)); 5959a6b2eed2SHong Zhang 59606aad120cSJose E. Roman /* allocate space for j and a arrays of B_oth */ 59619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 59629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5963a6b2eed2SHong Zhang 596487025532SHong Zhang /* j-array */ 5965a6b2eed2SHong Zhang /* post receives of j-array */ 5966a6b2eed2SHong Zhang for (i = 0; i < nrecvs; i++) { 596787025532SHong Zhang nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 59686497c311SBarry Smith PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5969a6b2eed2SHong Zhang } 5970e42f35eeSHong Zhang 5971e42f35eeSHong Zhang /* pack the outgoing message j-array */ 59723515ee7fSJunchao Zhang if (nsends) k = sstarts[0]; 5973a6b2eed2SHong Zhang for (i = 0; i < nsends; i++) { 5974e42f35eeSHong Zhang nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5975a6b2eed2SHong Zhang bufJ = bufj + sstartsj[i]; 597687025532SHong Zhang for (j = 0; j < nrows; j++) { 5977d0f46423SBarry Smith row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5978e42f35eeSHong Zhang for (ll = 0; ll < sbs; ll++) { 59799566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5980ad540459SPierre Jolivet for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 59819566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5982e42f35eeSHong Zhang } 598387025532SHong Zhang } 59846497c311SBarry Smith PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 598587025532SHong Zhang } 598687025532SHong Zhang 598787025532SHong Zhang /* recvs and sends of j-array are completed */ 59889566063dSJacob Faibussowitsch if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 598987025532SHong Zhang } else if (scall == MAT_REUSE_MATRIX) { 5990b7f45c76SHong Zhang sstartsj = *startsj_s; 59911d79065fSBarry Smith rstartsj = *startsj_r; 599287025532SHong Zhang bufa = *bufa_ptr; 59939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5994ddea5d60SJunchao Zhang } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 599587025532SHong Zhang 599687025532SHong Zhang /* a-array */ 599787025532SHong Zhang /* post receives of a-array */ 599887025532SHong Zhang for (i = 0; i < nrecvs; i++) { 599987025532SHong Zhang nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 60006497c311SBarry Smith PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 600187025532SHong Zhang } 6002e42f35eeSHong Zhang 6003e42f35eeSHong Zhang /* pack the outgoing message a-array */ 60043515ee7fSJunchao Zhang if (nsends) k = sstarts[0]; 600587025532SHong Zhang for (i = 0; i < nsends; i++) { 6006e42f35eeSHong Zhang nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 600787025532SHong Zhang bufA = bufa + sstartsj[i]; 600887025532SHong Zhang for (j = 0; j < nrows; j++) { 6009d0f46423SBarry Smith row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6010e42f35eeSHong Zhang for (ll = 0; ll < sbs; ll++) { 60119566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6012ad540459SPierre Jolivet for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 60139566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6014e42f35eeSHong Zhang } 6015a6b2eed2SHong Zhang } 60166497c311SBarry Smith PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6017a6b2eed2SHong Zhang } 601887025532SHong Zhang /* recvs and sends of a-array are completed */ 60199566063dSJacob Faibussowitsch if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 60209566063dSJacob Faibussowitsch PetscCall(PetscFree(reqs)); 6021a6b2eed2SHong Zhang 602287025532SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 6023dd460d27SBarry Smith Mat_SeqAIJ *b_oth; 6024dd460d27SBarry Smith 6025a6b2eed2SHong Zhang /* put together the new matrix */ 60269566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6027a6b2eed2SHong Zhang 6028a6b2eed2SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6029a6b2eed2SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 603087025532SHong Zhang b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6031e6b907acSBarry Smith b_oth->free_a = PETSC_TRUE; 6032e6b907acSBarry Smith b_oth->free_ij = PETSC_TRUE; 603387025532SHong Zhang b_oth->nonew = 0; 6034a6b2eed2SHong Zhang 60359566063dSJacob Faibussowitsch PetscCall(PetscFree(bufj)); 6036b7f45c76SHong Zhang if (!startsj_s || !bufa_ptr) { 60379566063dSJacob Faibussowitsch PetscCall(PetscFree2(sstartsj, rstartsj)); 60389566063dSJacob Faibussowitsch PetscCall(PetscFree(bufa_ptr)); 6039dea91ad1SHong Zhang } else { 6040b7f45c76SHong Zhang *startsj_s = sstartsj; 60411d79065fSBarry Smith *startsj_r = rstartsj; 604287025532SHong Zhang *bufa_ptr = bufa; 604387025532SHong Zhang } 6044fff043a9SJunchao Zhang } else if (scall == MAT_REUSE_MATRIX) { 60459566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6046dea91ad1SHong Zhang } 60473515ee7fSJunchao Zhang 60489566063dSJacob Faibussowitsch PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 60499566063dSJacob Faibussowitsch PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 60509566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 60513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6052429d309bSHong Zhang } 6053ccd8e176SBarry Smith 6054cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6055cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6056ca9cdca7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 60579779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE) 6058a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6059191b95cbSRichard Tran Mills #endif 6060ae8d29abSPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6061cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 60625d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 6063cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 60645d7652ecSHong Zhang #endif 6065d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 6066d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6067d24d4204SJose E. Roman #endif 606863c07aadSStefano Zampini #if defined(PETSC_HAVE_HYPRE) 606963c07aadSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 607063c07aadSStefano Zampini #endif 60713338378cSStefano Zampini #if defined(PETSC_HAVE_CUDA) 60723338378cSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 60733338378cSStefano Zampini #endif 6074d5e393b6SSuyash Tandon #if defined(PETSC_HAVE_HIP) 6075d5e393b6SSuyash Tandon PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6076d5e393b6SSuyash Tandon #endif 60773d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS) 60783d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 60793d0639e7SStefano Zampini #endif 6080d4002b98SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 60814222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 60824222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 608317667f90SBarry Smith 6084fc4dec0aSBarry Smith /* 6085fc4dec0aSBarry Smith Computes (B'*A')' since computing B*A directly is untenable 6086fc4dec0aSBarry Smith 6087fc4dec0aSBarry Smith n p p 60882da392ccSBarry Smith [ ] [ ] [ ] 60892da392ccSBarry Smith m [ A ] * n [ B ] = m [ C ] 60902da392ccSBarry Smith [ ] [ ] [ ] 6091fc4dec0aSBarry Smith 6092fc4dec0aSBarry Smith */ 6093d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6094d71ae5a4SJacob Faibussowitsch { 6095fc4dec0aSBarry Smith Mat At, Bt, Ct; 6096fc4dec0aSBarry Smith 6097fc4dec0aSBarry Smith PetscFunctionBegin; 60989566063dSJacob Faibussowitsch PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 60999566063dSJacob Faibussowitsch PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6100fb842aefSJose E. Roman PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 61019566063dSJacob Faibussowitsch PetscCall(MatDestroy(&At)); 61029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bt)); 61037fb60732SBarry Smith PetscCall(MatTransposeSetPrecursor(Ct, C)); 61049566063dSJacob Faibussowitsch PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 61059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ct)); 61063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6107fc4dec0aSBarry Smith } 6108fc4dec0aSBarry Smith 6109d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6110d71ae5a4SJacob Faibussowitsch { 61116718818eSStefano Zampini PetscBool cisdense; 6112fc4dec0aSBarry Smith 6113fc4dec0aSBarry Smith PetscFunctionBegin; 611408401ef6SPierre Jolivet PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 61159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 61169566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6117d5e393b6SSuyash Tandon PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 611848a46eb9SPierre Jolivet if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 61199566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 6120f75ecaa4SHong Zhang 61214222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 61223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6123fc4dec0aSBarry Smith } 6124fc4dec0aSBarry Smith 6125d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6126d71ae5a4SJacob Faibussowitsch { 61274222ddf1SHong Zhang Mat_Product *product = C->product; 61284222ddf1SHong Zhang Mat A = product->A, B = product->B; 6129fc4dec0aSBarry Smith 6130fc4dec0aSBarry Smith PetscFunctionBegin; 613120f4b53cSBarry Smith PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 613220f4b53cSBarry Smith A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 61334222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 61344222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 61353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6136fc4dec0aSBarry Smith } 6137fc4dec0aSBarry Smith 6138d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6139d71ae5a4SJacob Faibussowitsch { 61404222ddf1SHong Zhang Mat_Product *product = C->product; 61414222ddf1SHong Zhang 61424222ddf1SHong Zhang PetscFunctionBegin; 614348a46eb9SPierre Jolivet if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 61443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 61454222ddf1SHong Zhang } 6146394ed5ebSJunchao Zhang 614727430b45SBarry Smith /* 614827430b45SBarry Smith Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6149394ed5ebSJunchao Zhang 6150394ed5ebSJunchao Zhang Input Parameters: 6151394ed5ebSJunchao Zhang 6152651b1cf9SStefano Zampini j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6153651b1cf9SStefano Zampini j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6154394ed5ebSJunchao Zhang 6155158ec288SJunchao Zhang mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6156394ed5ebSJunchao Zhang 6157394ed5ebSJunchao Zhang For Set1, j1[] contains column indices of the nonzeros. 6158394ed5ebSJunchao Zhang For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6159394ed5ebSJunchao Zhang respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6160394ed5ebSJunchao Zhang but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6161394ed5ebSJunchao Zhang 6162394ed5ebSJunchao Zhang Similar for Set2. 6163394ed5ebSJunchao Zhang 6164394ed5ebSJunchao Zhang This routine merges the two sets of nonzeros row by row and removes repeats. 6165394ed5ebSJunchao Zhang 6166158ec288SJunchao Zhang Output Parameters: (memory is allocated by the caller) 6167394ed5ebSJunchao Zhang 6168394ed5ebSJunchao Zhang i[],j[]: the CSR of the merged matrix, which has m rows. 6169394ed5ebSJunchao Zhang imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6170394ed5ebSJunchao Zhang imap2[]: similar to imap1[], but for Set2. 6171394ed5ebSJunchao Zhang Note we order nonzeros row-by-row and from left to right. 6172394ed5ebSJunchao Zhang */ 6173d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6174d71ae5a4SJacob Faibussowitsch { 6175394ed5ebSJunchao Zhang PetscInt r, m; /* Row index of mat */ 6176394ed5ebSJunchao Zhang PetscCount t, t1, t2, b1, e1, b2, e2; 6177394ed5ebSJunchao Zhang 6178394ed5ebSJunchao Zhang PetscFunctionBegin; 61799566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, NULL)); 6180394ed5ebSJunchao Zhang t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6181394ed5ebSJunchao Zhang i[0] = 0; 6182394ed5ebSJunchao Zhang for (r = 0; r < m; r++) { /* Do row by row merging */ 6183394ed5ebSJunchao Zhang b1 = rowBegin1[r]; 6184394ed5ebSJunchao Zhang e1 = rowEnd1[r]; 6185394ed5ebSJunchao Zhang b2 = rowBegin2[r]; 6186394ed5ebSJunchao Zhang e2 = rowEnd2[r]; 6187394ed5ebSJunchao Zhang while (b1 < e1 && b2 < e2) { 6188394ed5ebSJunchao Zhang if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6189394ed5ebSJunchao Zhang j[t] = j1[b1]; 6190394ed5ebSJunchao Zhang imap1[t1] = t; 6191394ed5ebSJunchao Zhang imap2[t2] = t; 6192394ed5ebSJunchao Zhang b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6193394ed5ebSJunchao Zhang b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 61949371c9d4SSatish Balay t1++; 61959371c9d4SSatish Balay t2++; 61969371c9d4SSatish Balay t++; 6197394ed5ebSJunchao Zhang } else if (j1[b1] < j2[b2]) { 6198394ed5ebSJunchao Zhang j[t] = j1[b1]; 6199394ed5ebSJunchao Zhang imap1[t1] = t; 6200394ed5ebSJunchao Zhang b1 += jmap1[t1 + 1] - jmap1[t1]; 62019371c9d4SSatish Balay t1++; 62029371c9d4SSatish Balay t++; 6203394ed5ebSJunchao Zhang } else { 6204394ed5ebSJunchao Zhang j[t] = j2[b2]; 6205394ed5ebSJunchao Zhang imap2[t2] = t; 6206394ed5ebSJunchao Zhang b2 += jmap2[t2 + 1] - jmap2[t2]; 62079371c9d4SSatish Balay t2++; 62089371c9d4SSatish Balay t++; 6209394ed5ebSJunchao Zhang } 6210394ed5ebSJunchao Zhang } 6211394ed5ebSJunchao Zhang /* Merge the remaining in either j1[] or j2[] */ 6212394ed5ebSJunchao Zhang while (b1 < e1) { 6213394ed5ebSJunchao Zhang j[t] = j1[b1]; 6214394ed5ebSJunchao Zhang imap1[t1] = t; 6215394ed5ebSJunchao Zhang b1 += jmap1[t1 + 1] - jmap1[t1]; 62169371c9d4SSatish Balay t1++; 62179371c9d4SSatish Balay t++; 6218394ed5ebSJunchao Zhang } 6219394ed5ebSJunchao Zhang while (b2 < e2) { 6220394ed5ebSJunchao Zhang j[t] = j2[b2]; 6221394ed5ebSJunchao Zhang imap2[t2] = t; 6222394ed5ebSJunchao Zhang b2 += jmap2[t2 + 1] - jmap2[t2]; 62239371c9d4SSatish Balay t2++; 62249371c9d4SSatish Balay t++; 6225394ed5ebSJunchao Zhang } 62266497c311SBarry Smith PetscCall(PetscIntCast(t, i + r + 1)); 6227394ed5ebSJunchao Zhang } 62283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6229394ed5ebSJunchao Zhang } 6230394ed5ebSJunchao Zhang 623127430b45SBarry Smith /* 623227430b45SBarry Smith Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6233394ed5ebSJunchao Zhang 6234394ed5ebSJunchao Zhang Input Parameters: 6235394ed5ebSJunchao Zhang mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6236394ed5ebSJunchao Zhang n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6237394ed5ebSJunchao Zhang respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6238394ed5ebSJunchao Zhang 6239394ed5ebSJunchao Zhang i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6240394ed5ebSJunchao Zhang i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6241394ed5ebSJunchao Zhang 6242394ed5ebSJunchao Zhang Output Parameters: 6243394ed5ebSJunchao Zhang j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6244394ed5ebSJunchao Zhang rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6245394ed5ebSJunchao Zhang They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6246394ed5ebSJunchao Zhang and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6247394ed5ebSJunchao Zhang 6248394ed5ebSJunchao Zhang Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6249158ec288SJunchao Zhang Atot: number of entries belonging to the diagonal block. 6250158ec288SJunchao Zhang Annz: number of unique nonzeros belonging to the diagonal block. 6251394ed5ebSJunchao Zhang Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6252394ed5ebSJunchao Zhang repeats (i.e., same 'i,j' pair). 6253394ed5ebSJunchao Zhang Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6254394ed5ebSJunchao Zhang is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6255394ed5ebSJunchao Zhang 6256394ed5ebSJunchao Zhang Atot: number of entries belonging to the diagonal block 6257394ed5ebSJunchao Zhang Annz: number of unique nonzeros belonging to the diagonal block. 6258394ed5ebSJunchao Zhang 6259394ed5ebSJunchao Zhang Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6260394ed5ebSJunchao Zhang 6261158ec288SJunchao Zhang Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6262394ed5ebSJunchao Zhang */ 6263d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6264d71ae5a4SJacob Faibussowitsch { 6265394ed5ebSJunchao Zhang PetscInt cstart, cend, rstart, rend, row, col; 6266394ed5ebSJunchao Zhang PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6267394ed5ebSJunchao Zhang PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6268394ed5ebSJunchao Zhang PetscCount k, m, p, q, r, s, mid; 6269394ed5ebSJunchao Zhang PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6270394ed5ebSJunchao Zhang 6271394ed5ebSJunchao Zhang PetscFunctionBegin; 62729566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 62739566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6274394ed5ebSJunchao Zhang m = rend - rstart; 6275394ed5ebSJunchao Zhang 6276651b1cf9SStefano Zampini /* Skip negative rows */ 6277651b1cf9SStefano Zampini for (k = 0; k < n; k++) 62789371c9d4SSatish Balay if (i[k] >= 0) break; 6279394ed5ebSJunchao Zhang 6280394ed5ebSJunchao Zhang /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6281394ed5ebSJunchao Zhang fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6282394ed5ebSJunchao Zhang */ 6283394ed5ebSJunchao Zhang while (k < n) { 6284394ed5ebSJunchao Zhang row = i[k]; 6285394ed5ebSJunchao Zhang /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 62869371c9d4SSatish Balay for (s = k; s < n; s++) 62879371c9d4SSatish Balay if (i[s] != row) break; 6288651b1cf9SStefano Zampini 62891690c2aeSBarry Smith /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6290394ed5ebSJunchao Zhang for (p = k; p < s; p++) { 62911690c2aeSBarry Smith if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 629254c59aa7SJacob Faibussowitsch else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6293394ed5ebSJunchao Zhang } 62949566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6295158ec288SJunchao Zhang PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6296394ed5ebSJunchao Zhang rowBegin[row - rstart] = k; 6297394ed5ebSJunchao Zhang rowMid[row - rstart] = mid; 6298394ed5ebSJunchao Zhang rowEnd[row - rstart] = s; 6299394ed5ebSJunchao Zhang 6300394ed5ebSJunchao Zhang /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6301394ed5ebSJunchao Zhang Atot += mid - k; 6302394ed5ebSJunchao Zhang Btot += s - mid; 6303394ed5ebSJunchao Zhang 6304651b1cf9SStefano Zampini /* Count unique nonzeros of this diag row */ 6305394ed5ebSJunchao Zhang for (p = k; p < mid;) { 6306394ed5ebSJunchao Zhang col = j[p]; 63079371c9d4SSatish Balay do { 63081690c2aeSBarry Smith j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 63099371c9d4SSatish Balay p++; 6310651b1cf9SStefano Zampini } while (p < mid && j[p] == col); 6311394ed5ebSJunchao Zhang Annz++; 6312394ed5ebSJunchao Zhang } 6313394ed5ebSJunchao Zhang 6314651b1cf9SStefano Zampini /* Count unique nonzeros of this offdiag row */ 6315394ed5ebSJunchao Zhang for (p = mid; p < s;) { 6316394ed5ebSJunchao Zhang col = j[p]; 6317d71ae5a4SJacob Faibussowitsch do { 6318d71ae5a4SJacob Faibussowitsch p++; 6319d71ae5a4SJacob Faibussowitsch } while (p < s && j[p] == col); 6320394ed5ebSJunchao Zhang Bnnz++; 6321394ed5ebSJunchao Zhang } 6322394ed5ebSJunchao Zhang k = s; 6323394ed5ebSJunchao Zhang } 6324394ed5ebSJunchao Zhang 6325394ed5ebSJunchao Zhang /* Allocation according to Atot, Btot, Annz, Bnnz */ 6326158ec288SJunchao Zhang PetscCall(PetscMalloc1(Atot, &Aperm)); 6327158ec288SJunchao Zhang PetscCall(PetscMalloc1(Btot, &Bperm)); 6328158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6329158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6330394ed5ebSJunchao Zhang 63316aad120cSJose E. Roman /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6332394ed5ebSJunchao Zhang Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6333394ed5ebSJunchao Zhang for (r = 0; r < m; r++) { 6334394ed5ebSJunchao Zhang k = rowBegin[r]; 6335394ed5ebSJunchao Zhang mid = rowMid[r]; 6336394ed5ebSJunchao Zhang s = rowEnd[r]; 63378e3a54c0SPierre Jolivet PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 63388e3a54c0SPierre Jolivet PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6339394ed5ebSJunchao Zhang Atot += mid - k; 6340394ed5ebSJunchao Zhang Btot += s - mid; 6341394ed5ebSJunchao Zhang 6342394ed5ebSJunchao Zhang /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6343394ed5ebSJunchao Zhang for (p = k; p < mid;) { 6344394ed5ebSJunchao Zhang col = j[p]; 6345394ed5ebSJunchao Zhang q = p; 6346d71ae5a4SJacob Faibussowitsch do { 6347d71ae5a4SJacob Faibussowitsch p++; 6348d71ae5a4SJacob Faibussowitsch } while (p < mid && j[p] == col); 6349394ed5ebSJunchao Zhang Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6350394ed5ebSJunchao Zhang Annz++; 6351394ed5ebSJunchao Zhang } 6352394ed5ebSJunchao Zhang 6353394ed5ebSJunchao Zhang for (p = mid; p < s;) { 6354394ed5ebSJunchao Zhang col = j[p]; 6355394ed5ebSJunchao Zhang q = p; 6356d71ae5a4SJacob Faibussowitsch do { 6357d71ae5a4SJacob Faibussowitsch p++; 6358d71ae5a4SJacob Faibussowitsch } while (p < s && j[p] == col); 6359394ed5ebSJunchao Zhang Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6360394ed5ebSJunchao Zhang Bnnz++; 6361394ed5ebSJunchao Zhang } 6362394ed5ebSJunchao Zhang } 6363394ed5ebSJunchao Zhang /* Output */ 6364394ed5ebSJunchao Zhang *Aperm_ = Aperm; 6365394ed5ebSJunchao Zhang *Annz_ = Annz; 6366394ed5ebSJunchao Zhang *Atot_ = Atot; 6367394ed5ebSJunchao Zhang *Ajmap_ = Ajmap; 6368394ed5ebSJunchao Zhang *Bperm_ = Bperm; 6369394ed5ebSJunchao Zhang *Bnnz_ = Bnnz; 6370394ed5ebSJunchao Zhang *Btot_ = Btot; 6371394ed5ebSJunchao Zhang *Bjmap_ = Bjmap; 63723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6373394ed5ebSJunchao Zhang } 6374394ed5ebSJunchao Zhang 637527430b45SBarry Smith /* 637627430b45SBarry Smith Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6377158ec288SJunchao Zhang 6378158ec288SJunchao Zhang Input Parameters: 6379158ec288SJunchao Zhang nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6380158ec288SJunchao Zhang nnz: number of unique nonzeros in the merged matrix 6381158ec288SJunchao Zhang imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6382651b1cf9SStefano Zampini jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6383158ec288SJunchao Zhang 6384158ec288SJunchao Zhang Output Parameter: (memory is allocated by the caller) 6385158ec288SJunchao Zhang jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6386158ec288SJunchao Zhang 6387158ec288SJunchao Zhang Example: 6388158ec288SJunchao Zhang nnz1 = 4 6389158ec288SJunchao Zhang nnz = 6 6390158ec288SJunchao Zhang imap = [1,3,4,5] 6391158ec288SJunchao Zhang jmap = [0,3,5,6,7] 6392158ec288SJunchao Zhang then, 6393158ec288SJunchao Zhang jmap_new = [0,0,3,3,5,6,7] 6394158ec288SJunchao Zhang */ 6395d71ae5a4SJacob Faibussowitsch static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6396d71ae5a4SJacob Faibussowitsch { 6397158ec288SJunchao Zhang PetscCount k, p; 6398158ec288SJunchao Zhang 6399158ec288SJunchao Zhang PetscFunctionBegin; 6400158ec288SJunchao Zhang jmap_new[0] = 0; 6401158ec288SJunchao Zhang p = nnz; /* p loops over jmap_new[] backwards */ 6402158ec288SJunchao Zhang for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6403158ec288SJunchao Zhang for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6404158ec288SJunchao Zhang } 6405158ec288SJunchao Zhang for (; p >= 0; p--) jmap_new[p] = jmap[0]; 64063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6407158ec288SJunchao Zhang } 6408158ec288SJunchao Zhang 640949abdd8aSBarry Smith static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 64102c4ab24aSJunchao Zhang { 641149abdd8aSBarry Smith MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 64122c4ab24aSJunchao Zhang 64132c4ab24aSJunchao Zhang PetscFunctionBegin; 64142c4ab24aSJunchao Zhang PetscCall(PetscSFDestroy(&coo->sf)); 64152c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Aperm1)); 64162c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Bperm1)); 64172c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Ajmap1)); 64182c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Bjmap1)); 64192c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Aimap2)); 64202c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Bimap2)); 64212c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Aperm2)); 64222c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Bperm2)); 64232c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Ajmap2)); 64242c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Bjmap2)); 64252c4ab24aSJunchao Zhang PetscCall(PetscFree(coo->Cperm1)); 64262c4ab24aSJunchao Zhang PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 64272c4ab24aSJunchao Zhang PetscCall(PetscFree(coo)); 64282c4ab24aSJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 64292c4ab24aSJunchao Zhang } 64302c4ab24aSJunchao Zhang 6431d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6432d71ae5a4SJacob Faibussowitsch { 6433394ed5ebSJunchao Zhang MPI_Comm comm; 6434394ed5ebSJunchao Zhang PetscMPIInt rank, size; 6435394ed5ebSJunchao Zhang PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6436394ed5ebSJunchao Zhang PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6437394ed5ebSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 64382c4ab24aSJunchao Zhang PetscContainer container; 64392c4ab24aSJunchao Zhang MatCOOStruct_MPIAIJ *coo; 6440394ed5ebSJunchao Zhang 6441394ed5ebSJunchao Zhang PetscFunctionBegin; 64429566063dSJacob Faibussowitsch PetscCall(PetscFree(mpiaij->garray)); 64439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mpiaij->lvec)); 6444cbc6b225SStefano Zampini #if defined(PETSC_USE_CTABLE) 6445eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6446cbc6b225SStefano Zampini #else 64479566063dSJacob Faibussowitsch PetscCall(PetscFree(mpiaij->colmap)); 6448cbc6b225SStefano Zampini #endif 64499566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6450cbc6b225SStefano Zampini mat->assembled = PETSC_FALSE; 6451cbc6b225SStefano Zampini mat->was_assembled = PETSC_FALSE; 6452cbc6b225SStefano Zampini 64539566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 64549566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 64559566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 64569566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 64579566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 64589566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 64599566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 64609566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 64619566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &M, &N)); 6462394ed5ebSJunchao Zhang 64636aad120cSJose E. Roman /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6464394ed5ebSJunchao Zhang /* entries come first, then local rows, then remote rows. */ 6465394ed5ebSJunchao Zhang PetscCount n1 = coo_n, *perm1; 6466e8729f6fSJunchao Zhang PetscInt *i1 = coo_i, *j1 = coo_j; 6467e8729f6fSJunchao Zhang 6468e8729f6fSJunchao Zhang PetscCall(PetscMalloc1(n1, &perm1)); 6469394ed5ebSJunchao Zhang for (k = 0; k < n1; k++) perm1[k] = k; 6470394ed5ebSJunchao Zhang 6471394ed5ebSJunchao Zhang /* Manipulate indices so that entries with negative row or col indices will have smallest 6472394ed5ebSJunchao Zhang row indices, local entries will have greater but negative row indices, and remote entries 6473394ed5ebSJunchao Zhang will have positive row indices. 6474394ed5ebSJunchao Zhang */ 6475394ed5ebSJunchao Zhang for (k = 0; k < n1; k++) { 64761690c2aeSBarry Smith if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 64771690c2aeSBarry Smith else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6478f7d195e4SLawrence Mitchell else { 6479f7d195e4SLawrence Mitchell PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 64801690c2aeSBarry Smith if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6481f7d195e4SLawrence Mitchell } 6482394ed5ebSJunchao Zhang } 6483394ed5ebSJunchao Zhang 6484da81f932SPierre Jolivet /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 64859566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6486651b1cf9SStefano Zampini 6487651b1cf9SStefano Zampini /* Advance k to the first entry we need to take care of */ 6488651b1cf9SStefano Zampini for (k = 0; k < n1; k++) 64891690c2aeSBarry Smith if (i1[k] > PETSC_INT_MIN) break; 64906497c311SBarry Smith PetscCount i1start = k; 6491651b1cf9SStefano Zampini 64921690c2aeSBarry Smith PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 64931690c2aeSBarry Smith for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6494394ed5ebSJunchao Zhang 6495394ed5ebSJunchao Zhang /* Send remote rows to their owner */ 6496394ed5ebSJunchao Zhang /* Find which rows should be sent to which remote ranks*/ 6497394ed5ebSJunchao Zhang PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6498394ed5ebSJunchao Zhang PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6499394ed5ebSJunchao Zhang PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6500394ed5ebSJunchao Zhang const PetscInt *ranges; 6501394ed5ebSJunchao Zhang PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6502394ed5ebSJunchao Zhang 65039566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 65049566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6505394ed5ebSJunchao Zhang for (k = rem; k < n1;) { 6506394ed5ebSJunchao Zhang PetscMPIInt owner; 6507394ed5ebSJunchao Zhang PetscInt firstRow, lastRow; 6508cbc6b225SStefano Zampini 6509394ed5ebSJunchao Zhang /* Locate a row range */ 6510394ed5ebSJunchao Zhang firstRow = i1[k]; /* first row of this owner */ 65119566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6512394ed5ebSJunchao Zhang lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6513394ed5ebSJunchao Zhang 6514394ed5ebSJunchao Zhang /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 65159566063dSJacob Faibussowitsch PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6516394ed5ebSJunchao Zhang 6517394ed5ebSJunchao Zhang /* All entries in [k,p) belong to this remote owner */ 6518394ed5ebSJunchao Zhang if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6519394ed5ebSJunchao Zhang PetscMPIInt *sendto2; 6520394ed5ebSJunchao Zhang PetscInt *nentries2; 6521394ed5ebSJunchao Zhang PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6522cbc6b225SStefano Zampini 65239566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 65249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 65259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 65269566063dSJacob Faibussowitsch PetscCall(PetscFree2(sendto, nentries2)); 6527394ed5ebSJunchao Zhang sendto = sendto2; 6528394ed5ebSJunchao Zhang nentries = nentries2; 6529394ed5ebSJunchao Zhang maxNsend = maxNsend2; 6530394ed5ebSJunchao Zhang } 6531394ed5ebSJunchao Zhang sendto[nsend] = owner; 65326497c311SBarry Smith PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6533394ed5ebSJunchao Zhang nsend++; 6534394ed5ebSJunchao Zhang k = p; 6535394ed5ebSJunchao Zhang } 6536394ed5ebSJunchao Zhang 6537394ed5ebSJunchao Zhang /* Build 1st SF to know offsets on remote to send data */ 6538394ed5ebSJunchao Zhang PetscSF sf1; 6539394ed5ebSJunchao Zhang PetscInt nroots = 1, nroots2 = 0; 6540394ed5ebSJunchao Zhang PetscInt nleaves = nsend, nleaves2 = 0; 6541394ed5ebSJunchao Zhang PetscInt *offsets; 6542394ed5ebSJunchao Zhang PetscSFNode *iremote; 6543394ed5ebSJunchao Zhang 65449566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &sf1)); 65459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsend, &iremote)); 65469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsend, &offsets)); 6547394ed5ebSJunchao Zhang for (k = 0; k < nsend; k++) { 6548394ed5ebSJunchao Zhang iremote[k].rank = sendto[k]; 6549394ed5ebSJunchao Zhang iremote[k].index = 0; 6550394ed5ebSJunchao Zhang nleaves2 += nentries[k]; 655154c59aa7SJacob Faibussowitsch PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6552394ed5ebSJunchao Zhang } 65539566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 65549566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 65559566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 65569566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf1)); 6557e978a55eSPierre Jolivet PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6558394ed5ebSJunchao Zhang 6559394ed5ebSJunchao Zhang /* Build 2nd SF to send remote COOs to their owner */ 6560394ed5ebSJunchao Zhang PetscSF sf2; 6561394ed5ebSJunchao Zhang nroots = nroots2; 6562394ed5ebSJunchao Zhang nleaves = nleaves2; 65639566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &sf2)); 65649566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(sf2)); 65659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &iremote)); 6566394ed5ebSJunchao Zhang p = 0; 6567394ed5ebSJunchao Zhang for (k = 0; k < nsend; k++) { 656854c59aa7SJacob Faibussowitsch PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6569394ed5ebSJunchao Zhang for (q = 0; q < nentries[k]; q++, p++) { 6570394ed5ebSJunchao Zhang iremote[p].rank = sendto[k]; 65716497c311SBarry Smith PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6572394ed5ebSJunchao Zhang } 6573394ed5ebSJunchao Zhang } 65749566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6575394ed5ebSJunchao Zhang 6576394ed5ebSJunchao Zhang /* Send the remote COOs to their owner */ 6577394ed5ebSJunchao Zhang PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6578394ed5ebSJunchao Zhang PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 65799566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6580834dcf29SHansol Suh PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6581834dcf29SHansol Suh PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6582de0a9f35SPierre Jolivet PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6583de0a9f35SPierre Jolivet PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6584834dcf29SHansol Suh PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6585834dcf29SHansol Suh PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6586834dcf29SHansol Suh PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6587834dcf29SHansol Suh PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6588394ed5ebSJunchao Zhang 65899566063dSJacob Faibussowitsch PetscCall(PetscFree(offsets)); 65909566063dSJacob Faibussowitsch PetscCall(PetscFree2(sendto, nentries)); 6591394ed5ebSJunchao Zhang 6592394ed5ebSJunchao Zhang /* Sort received COOs by row along with the permutation array */ 6593394ed5ebSJunchao Zhang for (k = 0; k < n2; k++) perm2[k] = k; 65949566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6595394ed5ebSJunchao Zhang 6596651b1cf9SStefano Zampini /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6597651b1cf9SStefano Zampini PetscCount *Cperm1; 6598834dcf29SHansol Suh PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6599de0a9f35SPierre Jolivet PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6600651b1cf9SStefano Zampini PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6601834dcf29SHansol Suh PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6602651b1cf9SStefano Zampini 6603651b1cf9SStefano Zampini /* Support for HYPRE matrices, kind of a hack. 6604651b1cf9SStefano Zampini Swap min column with diagonal so that diagonal values will go first */ 6605651b1cf9SStefano Zampini PetscBool hypre; 660617b874c6SStefano Zampini PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6607651b1cf9SStefano Zampini if (hypre) { 6608651b1cf9SStefano Zampini PetscInt *minj; 6609651b1cf9SStefano Zampini PetscBT hasdiag; 6610651b1cf9SStefano Zampini 6611651b1cf9SStefano Zampini PetscCall(PetscBTCreate(m, &hasdiag)); 6612651b1cf9SStefano Zampini PetscCall(PetscMalloc1(m, &minj)); 66131690c2aeSBarry Smith for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6614651b1cf9SStefano Zampini for (k = i1start; k < rem; k++) { 6615651b1cf9SStefano Zampini if (j1[k] < cstart || j1[k] >= cend) continue; 6616651b1cf9SStefano Zampini const PetscInt rindex = i1[k] - rstart; 6617651b1cf9SStefano Zampini if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6618651b1cf9SStefano Zampini minj[rindex] = PetscMin(minj[rindex], j1[k]); 6619651b1cf9SStefano Zampini } 6620651b1cf9SStefano Zampini for (k = 0; k < n2; k++) { 6621651b1cf9SStefano Zampini if (j2[k] < cstart || j2[k] >= cend) continue; 6622651b1cf9SStefano Zampini const PetscInt rindex = i2[k] - rstart; 6623651b1cf9SStefano Zampini if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6624651b1cf9SStefano Zampini minj[rindex] = PetscMin(minj[rindex], j2[k]); 6625651b1cf9SStefano Zampini } 6626651b1cf9SStefano Zampini for (k = i1start; k < rem; k++) { 6627651b1cf9SStefano Zampini const PetscInt rindex = i1[k] - rstart; 6628651b1cf9SStefano Zampini if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6629651b1cf9SStefano Zampini if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6630651b1cf9SStefano Zampini else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6631651b1cf9SStefano Zampini } 6632651b1cf9SStefano Zampini for (k = 0; k < n2; k++) { 6633651b1cf9SStefano Zampini const PetscInt rindex = i2[k] - rstart; 6634651b1cf9SStefano Zampini if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6635651b1cf9SStefano Zampini if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6636651b1cf9SStefano Zampini else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6637651b1cf9SStefano Zampini } 6638651b1cf9SStefano Zampini PetscCall(PetscBTDestroy(&hasdiag)); 6639651b1cf9SStefano Zampini PetscCall(PetscFree(minj)); 6640651b1cf9SStefano Zampini } 6641651b1cf9SStefano Zampini 6642651b1cf9SStefano Zampini /* Split local COOs and received COOs into diag/offdiag portions */ 6643651b1cf9SStefano Zampini PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6644651b1cf9SStefano Zampini PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6645651b1cf9SStefano Zampini PetscCount Annz1, Bnnz1, Atot1, Btot1; 6646394ed5ebSJunchao Zhang PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6647394ed5ebSJunchao Zhang PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6648394ed5ebSJunchao Zhang PetscCount Annz2, Bnnz2, Atot2, Btot2; 6649394ed5ebSJunchao Zhang 6650651b1cf9SStefano Zampini PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 66519566063dSJacob Faibussowitsch PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6652651b1cf9SStefano Zampini PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 66539566063dSJacob Faibussowitsch PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6654394ed5ebSJunchao Zhang 6655394ed5ebSJunchao Zhang /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6656394ed5ebSJunchao Zhang PetscInt *Ai, *Bi; 6657394ed5ebSJunchao Zhang PetscInt *Aj, *Bj; 6658394ed5ebSJunchao Zhang 66599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &Ai)); 66609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &Bi)); 66619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 66629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6663394ed5ebSJunchao Zhang 6664394ed5ebSJunchao Zhang PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6665158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6666158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6667158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6668158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6669394ed5ebSJunchao Zhang 66709566063dSJacob Faibussowitsch PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 66719566063dSJacob Faibussowitsch PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6672158ec288SJunchao Zhang 6673158ec288SJunchao Zhang /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6674158ec288SJunchao Zhang /* expect nonzeros in A/B most likely have local contributing entries */ 6675158ec288SJunchao Zhang PetscInt Annz = Ai[m]; 6676158ec288SJunchao Zhang PetscInt Bnnz = Bi[m]; 6677158ec288SJunchao Zhang PetscCount *Ajmap1_new, *Bjmap1_new; 6678158ec288SJunchao Zhang 6679158ec288SJunchao Zhang PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6680158ec288SJunchao Zhang PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6681158ec288SJunchao Zhang 6682158ec288SJunchao Zhang PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6683158ec288SJunchao Zhang PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6684158ec288SJunchao Zhang 6685158ec288SJunchao Zhang PetscCall(PetscFree(Aimap1)); 6686158ec288SJunchao Zhang PetscCall(PetscFree(Ajmap1)); 6687158ec288SJunchao Zhang PetscCall(PetscFree(Bimap1)); 6688158ec288SJunchao Zhang PetscCall(PetscFree(Bjmap1)); 66899566063dSJacob Faibussowitsch PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 66909566063dSJacob Faibussowitsch PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6691e8729f6fSJunchao Zhang PetscCall(PetscFree(perm1)); 66929566063dSJacob Faibussowitsch PetscCall(PetscFree3(i2, j2, perm2)); 6693394ed5ebSJunchao Zhang 6694158ec288SJunchao Zhang Ajmap1 = Ajmap1_new; 6695158ec288SJunchao Zhang Bjmap1 = Bjmap1_new; 6696158ec288SJunchao Zhang 6697394ed5ebSJunchao Zhang /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6698394ed5ebSJunchao Zhang if (Annz < Annz1 + Annz2) { 6699394ed5ebSJunchao Zhang PetscInt *Aj_new; 67009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Annz, &Aj_new)); 67019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 67029566063dSJacob Faibussowitsch PetscCall(PetscFree(Aj)); 6703394ed5ebSJunchao Zhang Aj = Aj_new; 6704394ed5ebSJunchao Zhang } 6705394ed5ebSJunchao Zhang 6706394ed5ebSJunchao Zhang if (Bnnz < Bnnz1 + Bnnz2) { 6707394ed5ebSJunchao Zhang PetscInt *Bj_new; 67089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 67099566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 67109566063dSJacob Faibussowitsch PetscCall(PetscFree(Bj)); 6711394ed5ebSJunchao Zhang Bj = Bj_new; 6712394ed5ebSJunchao Zhang } 6713394ed5ebSJunchao Zhang 6714cbc6b225SStefano Zampini /* Create new submatrices for on-process and off-process coupling */ 6715394ed5ebSJunchao Zhang PetscScalar *Aa, *Ba; 6716cbc6b225SStefano Zampini MatType rtype; 6717394ed5ebSJunchao Zhang Mat_SeqAIJ *a, *b; 6718cf8ba265SJunchao Zhang PetscObjectState state; 67199566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 67209566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Bnnz, &Ba)); 6721394ed5ebSJunchao Zhang /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 67229371c9d4SSatish Balay if (cstart) { 67239371c9d4SSatish Balay for (k = 0; k < Annz; k++) Aj[k] -= cstart; 67249371c9d4SSatish Balay } 6725c508b908SBarry Smith 67269566063dSJacob Faibussowitsch PetscCall(MatGetRootType_Private(mat, &rtype)); 6727c508b908SBarry Smith 6728c508b908SBarry Smith MatSeqXAIJGetOptions_Private(mpiaij->A); 6729c508b908SBarry Smith PetscCall(MatDestroy(&mpiaij->A)); 67309566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 67310da474c6SJeremy L Thompson PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6732c508b908SBarry Smith MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6733c508b908SBarry Smith 6734c508b908SBarry Smith MatSeqXAIJGetOptions_Private(mpiaij->B); 6735c508b908SBarry Smith PetscCall(MatDestroy(&mpiaij->B)); 67369566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 67370da474c6SJeremy L Thompson PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6738c508b908SBarry Smith MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6739c508b908SBarry Smith 67409566063dSJacob Faibussowitsch PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 67415bb69915SJunchao Zhang mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6742cf8ba265SJunchao Zhang state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6743462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6744cbc6b225SStefano Zampini 6745394ed5ebSJunchao Zhang a = (Mat_SeqAIJ *)mpiaij->A->data; 6746394ed5ebSJunchao Zhang b = (Mat_SeqAIJ *)mpiaij->B->data; 67479f0612e4SBarry Smith a->free_a = PETSC_TRUE; 67489f0612e4SBarry Smith a->free_ij = PETSC_TRUE; 67499f0612e4SBarry Smith b->free_a = PETSC_TRUE; 67509f0612e4SBarry Smith b->free_ij = PETSC_TRUE; 675117b874c6SStefano Zampini a->maxnz = a->nz; 675217b874c6SStefano Zampini b->maxnz = b->nz; 6753394ed5ebSJunchao Zhang 6754cbc6b225SStefano Zampini /* conversion must happen AFTER multiply setup */ 67559566063dSJacob Faibussowitsch PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 67569566063dSJacob Faibussowitsch PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 67579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mpiaij->lvec)); 67589566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6759cbc6b225SStefano Zampini 67602c4ab24aSJunchao Zhang // Put the COO struct in a container and then attach that to the matrix 67612c4ab24aSJunchao Zhang PetscCall(PetscMalloc1(1, &coo)); 67622c4ab24aSJunchao Zhang coo->n = coo_n; 67632c4ab24aSJunchao Zhang coo->sf = sf2; 67642c4ab24aSJunchao Zhang coo->sendlen = nleaves; 67652c4ab24aSJunchao Zhang coo->recvlen = nroots; 67662c4ab24aSJunchao Zhang coo->Annz = Annz; 67672c4ab24aSJunchao Zhang coo->Bnnz = Bnnz; 67682c4ab24aSJunchao Zhang coo->Annz2 = Annz2; 67692c4ab24aSJunchao Zhang coo->Bnnz2 = Bnnz2; 67702c4ab24aSJunchao Zhang coo->Atot1 = Atot1; 67712c4ab24aSJunchao Zhang coo->Atot2 = Atot2; 67722c4ab24aSJunchao Zhang coo->Btot1 = Btot1; 67732c4ab24aSJunchao Zhang coo->Btot2 = Btot2; 67742c4ab24aSJunchao Zhang coo->Ajmap1 = Ajmap1; 67752c4ab24aSJunchao Zhang coo->Aperm1 = Aperm1; 67762c4ab24aSJunchao Zhang coo->Bjmap1 = Bjmap1; 67772c4ab24aSJunchao Zhang coo->Bperm1 = Bperm1; 67782c4ab24aSJunchao Zhang coo->Aimap2 = Aimap2; 67792c4ab24aSJunchao Zhang coo->Ajmap2 = Ajmap2; 67802c4ab24aSJunchao Zhang coo->Aperm2 = Aperm2; 67812c4ab24aSJunchao Zhang coo->Bimap2 = Bimap2; 67822c4ab24aSJunchao Zhang coo->Bjmap2 = Bjmap2; 67832c4ab24aSJunchao Zhang coo->Bperm2 = Bperm2; 67842c4ab24aSJunchao Zhang coo->Cperm1 = Cperm1; 67852c4ab24aSJunchao Zhang // Allocate in preallocation. If not used, it has zero cost on host 67862c4ab24aSJunchao Zhang PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 67872c4ab24aSJunchao Zhang PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 67882c4ab24aSJunchao Zhang PetscCall(PetscContainerSetPointer(container, coo)); 678949abdd8aSBarry Smith PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 67902c4ab24aSJunchao Zhang PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 67912c4ab24aSJunchao Zhang PetscCall(PetscContainerDestroy(&container)); 67923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6793394ed5ebSJunchao Zhang } 6794394ed5ebSJunchao Zhang 6795d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6796d71ae5a4SJacob Faibussowitsch { 6797394ed5ebSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6798394ed5ebSJunchao Zhang Mat A = mpiaij->A, B = mpiaij->B; 6799394ed5ebSJunchao Zhang PetscScalar *Aa, *Ba; 68002c4ab24aSJunchao Zhang PetscScalar *sendbuf, *recvbuf; 68012c4ab24aSJunchao Zhang const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 68022c4ab24aSJunchao Zhang const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 68032c4ab24aSJunchao Zhang const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 68042c4ab24aSJunchao Zhang const PetscCount *Cperm1; 68052c4ab24aSJunchao Zhang PetscContainer container; 68062c4ab24aSJunchao Zhang MatCOOStruct_MPIAIJ *coo; 6807394ed5ebSJunchao Zhang 6808394ed5ebSJunchao Zhang PetscFunctionBegin; 68092c4ab24aSJunchao Zhang PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 68102c4ab24aSJunchao Zhang PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 68112c4ab24aSJunchao Zhang PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 68122c4ab24aSJunchao Zhang sendbuf = coo->sendbuf; 68132c4ab24aSJunchao Zhang recvbuf = coo->recvbuf; 68142c4ab24aSJunchao Zhang Ajmap1 = coo->Ajmap1; 68152c4ab24aSJunchao Zhang Ajmap2 = coo->Ajmap2; 68162c4ab24aSJunchao Zhang Aimap2 = coo->Aimap2; 68172c4ab24aSJunchao Zhang Bjmap1 = coo->Bjmap1; 68182c4ab24aSJunchao Zhang Bjmap2 = coo->Bjmap2; 68192c4ab24aSJunchao Zhang Bimap2 = coo->Bimap2; 68202c4ab24aSJunchao Zhang Aperm1 = coo->Aperm1; 68212c4ab24aSJunchao Zhang Aperm2 = coo->Aperm2; 68222c4ab24aSJunchao Zhang Bperm1 = coo->Bperm1; 68232c4ab24aSJunchao Zhang Bperm2 = coo->Bperm2; 68242c4ab24aSJunchao Zhang Cperm1 = coo->Cperm1; 68252c4ab24aSJunchao Zhang 68269566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 68279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(B, &Ba)); 6828394ed5ebSJunchao Zhang 6829394ed5ebSJunchao Zhang /* Pack entries to be sent to remote */ 68302c4ab24aSJunchao Zhang for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6831394ed5ebSJunchao Zhang 6832394ed5ebSJunchao Zhang /* Send remote entries to their owner and overlap the communication with local computation */ 68332c4ab24aSJunchao Zhang PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6834394ed5ebSJunchao Zhang /* Add local entries to A and B */ 68352c4ab24aSJunchao Zhang for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6836da81f932SPierre Jolivet PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6837158ec288SJunchao Zhang for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6838158ec288SJunchao Zhang Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6839394ed5ebSJunchao Zhang } 68402c4ab24aSJunchao Zhang for (PetscCount i = 0; i < coo->Bnnz; i++) { 6841158ec288SJunchao Zhang PetscScalar sum = 0.0; 6842158ec288SJunchao Zhang for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6843158ec288SJunchao Zhang Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6844394ed5ebSJunchao Zhang } 68452c4ab24aSJunchao Zhang PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6846394ed5ebSJunchao Zhang 6847394ed5ebSJunchao Zhang /* Add received remote entries to A and B */ 68482c4ab24aSJunchao Zhang for (PetscCount i = 0; i < coo->Annz2; i++) { 6849394ed5ebSJunchao Zhang for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6850394ed5ebSJunchao Zhang } 68512c4ab24aSJunchao Zhang for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6852394ed5ebSJunchao Zhang for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6853394ed5ebSJunchao Zhang } 68549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 68559566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 68563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6857394ed5ebSJunchao Zhang } 6858394ed5ebSJunchao Zhang 6859ccd8e176SBarry Smith /*MC 6860ccd8e176SBarry Smith MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6861ccd8e176SBarry Smith 6862ccd8e176SBarry Smith Options Database Keys: 686311a5261eSBarry Smith . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6864ccd8e176SBarry Smith 6865ccd8e176SBarry Smith Level: beginner 68660cd7f59aSBarry Smith 68670cd7f59aSBarry Smith Notes: 68682ef1f0ffSBarry Smith `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 68690cd7f59aSBarry Smith in this case the values associated with the rows and columns one passes in are set to zero 68700cd7f59aSBarry Smith in the matrix 68710cd7f59aSBarry Smith 687211a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 687311a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6874ccd8e176SBarry Smith 68751cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6876ccd8e176SBarry Smith M*/ 6877d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6878d71ae5a4SJacob Faibussowitsch { 6879ccd8e176SBarry Smith Mat_MPIAIJ *b; 6880ccd8e176SBarry Smith PetscMPIInt size; 6881ccd8e176SBarry Smith 6882ccd8e176SBarry Smith PetscFunctionBegin; 68839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 68842205254eSKarl Rupp 68854dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 6886ccd8e176SBarry Smith B->data = (void *)b; 6887aea10558SJacob Faibussowitsch B->ops[0] = MatOps_Values; 6888ccd8e176SBarry Smith B->assembled = PETSC_FALSE; 6889ccd8e176SBarry Smith B->insertmode = NOT_SET_VALUES; 6890ccd8e176SBarry Smith b->size = size; 68912205254eSKarl Rupp 68929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6893ccd8e176SBarry Smith 6894ccd8e176SBarry Smith /* build cache for off array entries formed */ 68959566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 68962205254eSKarl Rupp 6897ccd8e176SBarry Smith b->donotstash = PETSC_FALSE; 6898f4259b30SLisandro Dalcin b->colmap = NULL; 6899f4259b30SLisandro Dalcin b->garray = NULL; 6900ccd8e176SBarry Smith b->roworiented = PETSC_TRUE; 6901ccd8e176SBarry Smith 6902ccd8e176SBarry Smith /* stuff used for matrix vector multiply */ 69030298fd71SBarry Smith b->lvec = NULL; 69040298fd71SBarry Smith b->Mvctx = NULL; 6905ccd8e176SBarry Smith 6906ccd8e176SBarry Smith /* stuff for MatGetRow() */ 6907f4259b30SLisandro Dalcin b->rowindices = NULL; 6908f4259b30SLisandro Dalcin b->rowvalues = NULL; 6909ccd8e176SBarry Smith b->getrowactive = PETSC_FALSE; 6910ccd8e176SBarry Smith 6911f719121fSJed Brown /* flexible pointer used in CUSPARSE classes */ 69120298fd71SBarry Smith b->spptr = NULL; 6913f60c3dc2SHong Zhang 69149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 69159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 69169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 69179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 69189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 69199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6920*674b392bSAlexander PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 69219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 69229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 69239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 69249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 69253d0639e7SStefano Zampini #if defined(PETSC_HAVE_CUDA) 69269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 69273d0639e7SStefano Zampini #endif 6928d5e393b6SSuyash Tandon #if defined(PETSC_HAVE_HIP) 6929d5e393b6SSuyash Tandon PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6930d5e393b6SSuyash Tandon #endif 69313d0639e7SStefano Zampini #if defined(PETSC_HAVE_KOKKOS_KERNELS) 69329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 69333d0639e7SStefano Zampini #endif 69349779e05dSSatish Balay #if defined(PETSC_HAVE_MKL_SPARSE) 69359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6936191b95cbSRichard Tran Mills #endif 69379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 69389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 69399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 69409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 69415d7652ecSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 69429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 69435d7652ecSHong Zhang #endif 6944d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 69459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6946d24d4204SJose E. Roman #endif 69479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 69489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 69493dad0653Sstefano_zampini #if defined(PETSC_HAVE_HYPRE) 69509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 69519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 69523dad0653Sstefano_zampini #endif 69539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 69549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 69559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 69569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 69579566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 69583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6959ccd8e176SBarry Smith } 696081824310SBarry Smith 69615d83a8b1SBarry Smith /*@ 696211a5261eSBarry Smith MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 696303bfb495SBarry Smith and "off-diagonal" part of the matrix in CSR format. 696403bfb495SBarry Smith 6965d083f849SBarry Smith Collective 696603bfb495SBarry Smith 696703bfb495SBarry Smith Input Parameters: 696803bfb495SBarry Smith + comm - MPI communicator 696911a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 697003bfb495SBarry Smith . n - This value should be the same as the local size used in creating the 6971d8a51d2aSBarry Smith x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 69722ef1f0ffSBarry Smith calculated if `N` is given) For square matrices `n` is almost always `m`. 69732ef1f0ffSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 69742ef1f0ffSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6975483a2f95SBarry Smith . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 697604ccdda3SJunchao Zhang . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 697703bfb495SBarry Smith . a - matrix values 6978483a2f95SBarry Smith . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 69792ef1f0ffSBarry Smith . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 698003bfb495SBarry Smith - oa - matrix values 698103bfb495SBarry Smith 698203bfb495SBarry Smith Output Parameter: 698303bfb495SBarry Smith . mat - the matrix 698403bfb495SBarry Smith 698503bfb495SBarry Smith Level: advanced 698603bfb495SBarry Smith 698703bfb495SBarry Smith Notes: 6988f13dfd9eSBarry Smith The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6989292fb18eSBarry Smith must free the arrays once the matrix has been destroyed and not before. 699003bfb495SBarry Smith 69912ef1f0ffSBarry Smith The `i` and `j` indices are 0 based 699203bfb495SBarry Smith 69932ef1f0ffSBarry Smith See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 699403bfb495SBarry Smith 69957b55108eSBarry Smith This sets local rows and cannot be used to set off-processor values. 69967b55108eSBarry Smith 6997dca341c0SJed Brown Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6998dca341c0SJed Brown legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6999dca341c0SJed Brown not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 7000dca341c0SJed Brown the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 700111a5261eSBarry Smith keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 7002dca341c0SJed Brown communication if it is known that only local entries will be set. 700303bfb495SBarry Smith 70041cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7005db781477SPatrick Sanan `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 70062b26979fSBarry Smith @*/ 7007d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7008d71ae5a4SJacob Faibussowitsch { 700903bfb495SBarry Smith Mat_MPIAIJ *maij; 701003bfb495SBarry Smith 701103bfb495SBarry Smith PetscFunctionBegin; 701208401ef6SPierre Jolivet PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7013aed4548fSBarry Smith PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7014aed4548fSBarry Smith PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 70159566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 70169566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 70179566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPIAIJ)); 701803bfb495SBarry Smith maij = (Mat_MPIAIJ *)(*mat)->data; 70192205254eSKarl Rupp 70208d7a6e47SBarry Smith (*mat)->preallocated = PETSC_TRUE; 702103bfb495SBarry Smith 70229566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->rmap)); 70239566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp((*mat)->cmap)); 702403bfb495SBarry Smith 70259566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 70269566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 702703bfb495SBarry Smith 70289566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 70299566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 70309566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 70319566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 70329566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 70333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 703403bfb495SBarry Smith } 703503bfb495SBarry Smith 70364e84afc0SStefano Zampini typedef struct { 70374e84afc0SStefano Zampini Mat *mp; /* intermediate products */ 70384e84afc0SStefano Zampini PetscBool *mptmp; /* is the intermediate product temporary ? */ 70394e84afc0SStefano Zampini PetscInt cp; /* number of intermediate products */ 70404e84afc0SStefano Zampini 70414e84afc0SStefano Zampini /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 70424e84afc0SStefano Zampini PetscInt *startsj_s, *startsj_r; 70434e84afc0SStefano Zampini PetscScalar *bufa; 70444e84afc0SStefano Zampini Mat P_oth; 70454e84afc0SStefano Zampini 70464e84afc0SStefano Zampini /* may take advantage of merging product->B */ 7047ddea5d60SJunchao Zhang Mat Bloc; /* B-local by merging diag and off-diag */ 70484e84afc0SStefano Zampini 7049ddea5d60SJunchao Zhang /* cusparse does not have support to split between symbolic and numeric phases. 70504e84afc0SStefano Zampini When api_user is true, we don't need to update the numerical values 70514e84afc0SStefano Zampini of the temporary storage */ 70524e84afc0SStefano Zampini PetscBool reusesym; 70534e84afc0SStefano Zampini 70544e84afc0SStefano Zampini /* support for COO values insertion */ 7055ddea5d60SJunchao Zhang PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7056ddea5d60SJunchao Zhang PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7057ddea5d60SJunchao Zhang PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7058ddea5d60SJunchao Zhang PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7059c215019aSStefano Zampini PetscSF sf; /* used for non-local values insertion and memory malloc */ 7060c215019aSStefano Zampini PetscMemType mtype; 70614e84afc0SStefano Zampini 70624e84afc0SStefano Zampini /* customization */ 70634e84afc0SStefano Zampini PetscBool abmerge; 7064abb89eb1SStefano Zampini PetscBool P_oth_bind; 70654e84afc0SStefano Zampini } MatMatMPIAIJBACKEND; 70664e84afc0SStefano Zampini 7067ba38deedSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7068d71ae5a4SJacob Faibussowitsch { 70694e84afc0SStefano Zampini MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 70704e84afc0SStefano Zampini PetscInt i; 70714e84afc0SStefano Zampini 70724e84afc0SStefano Zampini PetscFunctionBegin; 70739566063dSJacob Faibussowitsch PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 70749566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->bufa)); 70759566063dSJacob Faibussowitsch PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 70769566063dSJacob Faibussowitsch PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 70779566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mmdata->P_oth)); 70789566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mmdata->Bloc)); 70799566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&mmdata->sf)); 708048a46eb9SPierre Jolivet for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 70819566063dSJacob Faibussowitsch PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 70829566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->own[0])); 70839566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->own)); 70849566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->off[0])); 70859566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata->off)); 70869566063dSJacob Faibussowitsch PetscCall(PetscFree(mmdata)); 70873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 70884e84afc0SStefano Zampini } 70894e84afc0SStefano Zampini 7090fff043a9SJunchao Zhang /* Copy selected n entries with indices in idx[] of A to v[]. 7091fff043a9SJunchao Zhang If idx is NULL, copy the whole data array of A to v[] 7092fff043a9SJunchao Zhang */ 7093d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7094d71ae5a4SJacob Faibussowitsch { 7095c215019aSStefano Zampini PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7096c215019aSStefano Zampini 7097c215019aSStefano Zampini PetscFunctionBegin; 70989566063dSJacob Faibussowitsch PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7099c215019aSStefano Zampini if (f) { 71009566063dSJacob Faibussowitsch PetscCall((*f)(A, n, idx, v)); 7101c215019aSStefano Zampini } else { 7102c215019aSStefano Zampini const PetscScalar *vv; 7103c215019aSStefano Zampini 71049566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7105c215019aSStefano Zampini if (n && idx) { 7106c215019aSStefano Zampini PetscScalar *w = v; 7107c215019aSStefano Zampini const PetscInt *oi = idx; 7108c215019aSStefano Zampini PetscInt j; 7109c215019aSStefano Zampini 7110c215019aSStefano Zampini for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7111c215019aSStefano Zampini } else { 71129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(v, vv, n)); 7113c215019aSStefano Zampini } 71149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7115c215019aSStefano Zampini } 71163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7117c215019aSStefano Zampini } 7118c215019aSStefano Zampini 7119d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7120d71ae5a4SJacob Faibussowitsch { 71214e84afc0SStefano Zampini MatMatMPIAIJBACKEND *mmdata; 71224e84afc0SStefano Zampini PetscInt i, n_d, n_o; 71234e84afc0SStefano Zampini 71244e84afc0SStefano Zampini PetscFunctionBegin; 71254e84afc0SStefano Zampini MatCheckProduct(C, 1); 712628b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 71274e84afc0SStefano Zampini mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 71284e84afc0SStefano Zampini if (!mmdata->reusesym) { /* update temporary matrices */ 712948a46eb9SPierre Jolivet if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 713048a46eb9SPierre Jolivet if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 71314e84afc0SStefano Zampini } 71324e84afc0SStefano Zampini mmdata->reusesym = PETSC_FALSE; 7133abb89eb1SStefano Zampini 7134abb89eb1SStefano Zampini for (i = 0; i < mmdata->cp; i++) { 713508401ef6SPierre Jolivet PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 71369566063dSJacob Faibussowitsch PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7137abb89eb1SStefano Zampini } 71384e84afc0SStefano Zampini for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 71396497c311SBarry Smith PetscInt noff; 71404e84afc0SStefano Zampini 71416497c311SBarry Smith PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 71424e84afc0SStefano Zampini if (mmdata->mptmp[i]) continue; 71434e84afc0SStefano Zampini if (noff) { 71446497c311SBarry Smith PetscInt nown; 7145c215019aSStefano Zampini 71466497c311SBarry Smith PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 71479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 71489566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 71494e84afc0SStefano Zampini n_o += noff; 71504e84afc0SStefano Zampini n_d += nown; 71514e84afc0SStefano Zampini } else { 7152c215019aSStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7153c215019aSStefano Zampini 71549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 71554e84afc0SStefano Zampini n_d += mm->nz; 71564e84afc0SStefano Zampini } 71574e84afc0SStefano Zampini } 7158c215019aSStefano Zampini if (mmdata->hasoffproc) { /* offprocess insertion */ 71599566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 71609566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 71614e84afc0SStefano Zampini } 71629566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 71633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 71644e84afc0SStefano Zampini } 71654e84afc0SStefano Zampini 71664e84afc0SStefano Zampini /* Support for Pt * A, A * P, or Pt * A * P */ 71674e84afc0SStefano Zampini #define MAX_NUMBER_INTERMEDIATE 4 7168d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7169d71ae5a4SJacob Faibussowitsch { 71704e84afc0SStefano Zampini Mat_Product *product = C->product; 7171ddea5d60SJunchao Zhang Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 71724e84afc0SStefano Zampini Mat_MPIAIJ *a, *p; 71734e84afc0SStefano Zampini MatMatMPIAIJBACKEND *mmdata; 71744e84afc0SStefano Zampini ISLocalToGlobalMapping P_oth_l2g = NULL; 71754e84afc0SStefano Zampini IS glob = NULL; 71764e84afc0SStefano Zampini const char *prefix; 71774e84afc0SStefano Zampini char pprefix[256]; 71784e84afc0SStefano Zampini const PetscInt *globidx, *P_oth_idx; 717982a78a4eSJed Brown PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 718082a78a4eSJed Brown PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7181ddea5d60SJunchao Zhang PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7182ddea5d60SJunchao Zhang /* type-0: consecutive, start from 0; type-1: consecutive with */ 7183ddea5d60SJunchao Zhang /* a base offset; type-2: sparse with a local to global map table */ 7184ddea5d60SJunchao Zhang const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7185ddea5d60SJunchao Zhang 71864e84afc0SStefano Zampini MatProductType ptype; 7187d5e393b6SSuyash Tandon PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 71884e84afc0SStefano Zampini PetscMPIInt size; 71894e84afc0SStefano Zampini 71904e84afc0SStefano Zampini PetscFunctionBegin; 71914e84afc0SStefano Zampini MatCheckProduct(C, 1); 719228b400f6SJacob Faibussowitsch PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 71934e84afc0SStefano Zampini ptype = product->type; 7194b94d7dedSBarry Smith if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7195fa046f9fSJunchao Zhang ptype = MATPRODUCT_AB; 7196fa046f9fSJunchao Zhang product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7197fa046f9fSJunchao Zhang } 71984e84afc0SStefano Zampini switch (ptype) { 71994e84afc0SStefano Zampini case MATPRODUCT_AB: 72004e84afc0SStefano Zampini A = product->A; 72014e84afc0SStefano Zampini P = product->B; 72024e84afc0SStefano Zampini m = A->rmap->n; 72034e84afc0SStefano Zampini n = P->cmap->n; 72044e84afc0SStefano Zampini M = A->rmap->N; 72054e84afc0SStefano Zampini N = P->cmap->N; 7206ddea5d60SJunchao Zhang hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 72074e84afc0SStefano Zampini break; 72084e84afc0SStefano Zampini case MATPRODUCT_AtB: 72094e84afc0SStefano Zampini P = product->A; 72104e84afc0SStefano Zampini A = product->B; 72114e84afc0SStefano Zampini m = P->cmap->n; 72124e84afc0SStefano Zampini n = A->cmap->n; 72134e84afc0SStefano Zampini M = P->cmap->N; 72144e84afc0SStefano Zampini N = A->cmap->N; 72154e84afc0SStefano Zampini hasoffproc = PETSC_TRUE; 72164e84afc0SStefano Zampini break; 72174e84afc0SStefano Zampini case MATPRODUCT_PtAP: 72184e84afc0SStefano Zampini A = product->A; 72194e84afc0SStefano Zampini P = product->B; 72204e84afc0SStefano Zampini m = P->cmap->n; 72214e84afc0SStefano Zampini n = P->cmap->n; 72224e84afc0SStefano Zampini M = P->cmap->N; 72234e84afc0SStefano Zampini N = P->cmap->N; 72244e84afc0SStefano Zampini hasoffproc = PETSC_TRUE; 72254e84afc0SStefano Zampini break; 7226d71ae5a4SJacob Faibussowitsch default: 7227d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 72284e84afc0SStefano Zampini } 72299566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 72304e84afc0SStefano Zampini if (size == 1) hasoffproc = PETSC_FALSE; 72314e84afc0SStefano Zampini 72324e84afc0SStefano Zampini /* defaults */ 72334e84afc0SStefano Zampini for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 72344e84afc0SStefano Zampini mp[i] = NULL; 72354e84afc0SStefano Zampini mptmp[i] = PETSC_FALSE; 72364e84afc0SStefano Zampini rmapt[i] = -1; 72374e84afc0SStefano Zampini cmapt[i] = -1; 72384e84afc0SStefano Zampini rmapa[i] = NULL; 72394e84afc0SStefano Zampini cmapa[i] = NULL; 72404e84afc0SStefano Zampini } 72414e84afc0SStefano Zampini 72424e84afc0SStefano Zampini /* customization */ 72439566063dSJacob Faibussowitsch PetscCall(PetscNew(&mmdata)); 72444e84afc0SStefano Zampini mmdata->reusesym = product->api_user; 72454e84afc0SStefano Zampini if (ptype == MATPRODUCT_AB) { 72464e84afc0SStefano Zampini if (product->api_user) { 7247d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 72489566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 72499566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7250d0609cedSBarry Smith PetscOptionsEnd(); 72514e84afc0SStefano Zampini } else { 7252d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 72539566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 72549566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7255d0609cedSBarry Smith PetscOptionsEnd(); 7256abb89eb1SStefano Zampini } 7257abb89eb1SStefano Zampini } else if (ptype == MATPRODUCT_PtAP) { 7258abb89eb1SStefano Zampini if (product->api_user) { 7259d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 72609566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7261d0609cedSBarry Smith PetscOptionsEnd(); 7262abb89eb1SStefano Zampini } else { 7263d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 72649566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7265d0609cedSBarry Smith PetscOptionsEnd(); 72664e84afc0SStefano Zampini } 72674e84afc0SStefano Zampini } 72684e84afc0SStefano Zampini a = (Mat_MPIAIJ *)A->data; 72694e84afc0SStefano Zampini p = (Mat_MPIAIJ *)P->data; 72709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, m, n, M, N)); 72719566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(C->rmap)); 72729566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(C->cmap)); 72739566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 72749566063dSJacob Faibussowitsch PetscCall(MatGetOptionsPrefix(C, &prefix)); 7275ddea5d60SJunchao Zhang 7276ddea5d60SJunchao Zhang cp = 0; 72774e84afc0SStefano Zampini switch (ptype) { 72784e84afc0SStefano Zampini case MATPRODUCT_AB: /* A * P */ 72799566063dSJacob Faibussowitsch PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 72804e84afc0SStefano Zampini 7281ddea5d60SJunchao Zhang /* A_diag * P_local (merged or not) */ 7282ddea5d60SJunchao Zhang if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 72834e84afc0SStefano Zampini /* P is product->B */ 72849566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 72859566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 72869566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 72879566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 72889566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 72899566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 72909566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 72914e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 72929566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 72939566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 72949566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob, &globidx)); 72954e84afc0SStefano Zampini rmapt[cp] = 1; 72964e84afc0SStefano Zampini cmapt[cp] = 2; 72974e84afc0SStefano Zampini cmapa[cp] = globidx; 72984e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 72994e84afc0SStefano Zampini cp++; 7300ddea5d60SJunchao Zhang } else { /* A_diag * P_diag and A_diag * P_off */ 73019566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 73029566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 73039566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 73049566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 73059566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 73069566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 73074e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 73089566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 73099566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 73104e84afc0SStefano Zampini rmapt[cp] = 1; 73114e84afc0SStefano Zampini cmapt[cp] = 1; 73124e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 73134e84afc0SStefano Zampini cp++; 73149566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 73159566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 73169566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 73179566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 73189566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 73199566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 73204e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 73219566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 73229566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 73234e84afc0SStefano Zampini rmapt[cp] = 1; 73244e84afc0SStefano Zampini cmapt[cp] = 2; 73254e84afc0SStefano Zampini cmapa[cp] = p->garray; 73264e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 73274e84afc0SStefano Zampini cp++; 73284e84afc0SStefano Zampini } 7329ddea5d60SJunchao Zhang 7330ddea5d60SJunchao Zhang /* A_off * P_other */ 73314e84afc0SStefano Zampini if (mmdata->P_oth) { 73329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 73339566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7334f4f49eeaSPierre Jolivet PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 73359566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 73369566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 73379566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 73389566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 73399566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 73409566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 73419566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 73424e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 73439566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 73449566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 73454e84afc0SStefano Zampini rmapt[cp] = 1; 73464e84afc0SStefano Zampini cmapt[cp] = 2; 73474e84afc0SStefano Zampini cmapa[cp] = P_oth_idx; 73484e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 73494e84afc0SStefano Zampini cp++; 73504e84afc0SStefano Zampini } 73514e84afc0SStefano Zampini break; 7352ddea5d60SJunchao Zhang 73534e84afc0SStefano Zampini case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 73544e84afc0SStefano Zampini /* A is product->B */ 73559566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7356ddea5d60SJunchao Zhang if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 73579566063dSJacob Faibussowitsch PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 73589566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 73599566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 73609566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 73619566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 73629566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 73634e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 73649566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 73659566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 73669566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob, &globidx)); 73674e84afc0SStefano Zampini rmapt[cp] = 2; 73684e84afc0SStefano Zampini rmapa[cp] = globidx; 73694e84afc0SStefano Zampini cmapt[cp] = 2; 73704e84afc0SStefano Zampini cmapa[cp] = globidx; 73714e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 73724e84afc0SStefano Zampini cp++; 73734e84afc0SStefano Zampini } else { 73749566063dSJacob Faibussowitsch PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 73759566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 73769566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 73779566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 73789566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 73799566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 73804e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 73819566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 73829566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 73839566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob, &globidx)); 73844e84afc0SStefano Zampini rmapt[cp] = 1; 73854e84afc0SStefano Zampini cmapt[cp] = 2; 73864e84afc0SStefano Zampini cmapa[cp] = globidx; 73874e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 73884e84afc0SStefano Zampini cp++; 73899566063dSJacob Faibussowitsch PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 73909566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 73919566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 73929566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 73939566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 73949566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 73954e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 73969566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 73979566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 73984e84afc0SStefano Zampini rmapt[cp] = 2; 73994e84afc0SStefano Zampini rmapa[cp] = p->garray; 74004e84afc0SStefano Zampini cmapt[cp] = 2; 74014e84afc0SStefano Zampini cmapa[cp] = globidx; 74024e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 74034e84afc0SStefano Zampini cp++; 74044e84afc0SStefano Zampini } 74054e84afc0SStefano Zampini break; 74064e84afc0SStefano Zampini case MATPRODUCT_PtAP: 74079566063dSJacob Faibussowitsch PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 74084e84afc0SStefano Zampini /* P is product->B */ 74099566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 74109566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 74119566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 74129566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 74139566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 74149566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 74159566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 74164e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 74179566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 74189566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 74199566063dSJacob Faibussowitsch PetscCall(ISGetIndices(glob, &globidx)); 74204e84afc0SStefano Zampini rmapt[cp] = 2; 74214e84afc0SStefano Zampini rmapa[cp] = globidx; 74224e84afc0SStefano Zampini cmapt[cp] = 2; 74234e84afc0SStefano Zampini cmapa[cp] = globidx; 74244e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 74254e84afc0SStefano Zampini cp++; 74264e84afc0SStefano Zampini if (mmdata->P_oth) { 74279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 74289566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7429f4f49eeaSPierre Jolivet PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 74309566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 74319566063dSJacob Faibussowitsch PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 74329566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 74339566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 74349566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 74359566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 74369566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 74374e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 74389566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 74399566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 74404e84afc0SStefano Zampini mptmp[cp] = PETSC_TRUE; 74414e84afc0SStefano Zampini cp++; 74429566063dSJacob Faibussowitsch PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 74439566063dSJacob Faibussowitsch PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 74449566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(mp[cp], product->fill)); 74459566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 74469566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 74479566063dSJacob Faibussowitsch PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 74484e84afc0SStefano Zampini mp[cp]->product->api_user = product->api_user; 74499566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(mp[cp])); 74509566063dSJacob Faibussowitsch PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 74514e84afc0SStefano Zampini rmapt[cp] = 2; 74524e84afc0SStefano Zampini rmapa[cp] = globidx; 74534e84afc0SStefano Zampini cmapt[cp] = 2; 74544e84afc0SStefano Zampini cmapa[cp] = P_oth_idx; 74554e84afc0SStefano Zampini mptmp[cp] = PETSC_FALSE; 74564e84afc0SStefano Zampini cp++; 74574e84afc0SStefano Zampini } 74584e84afc0SStefano Zampini break; 7459d71ae5a4SJacob Faibussowitsch default: 7460d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 74614e84afc0SStefano Zampini } 74624e84afc0SStefano Zampini /* sanity check */ 74639371c9d4SSatish Balay if (size > 1) 74649371c9d4SSatish Balay for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 74654e84afc0SStefano Zampini 74669566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7467ddea5d60SJunchao Zhang for (i = 0; i < cp; i++) { 7468ddea5d60SJunchao Zhang mmdata->mp[i] = mp[i]; 7469ddea5d60SJunchao Zhang mmdata->mptmp[i] = mptmp[i]; 7470ddea5d60SJunchao Zhang } 74714e84afc0SStefano Zampini mmdata->cp = cp; 74724e84afc0SStefano Zampini C->product->data = mmdata; 74734e84afc0SStefano Zampini C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 74744e84afc0SStefano Zampini C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 74754e84afc0SStefano Zampini 7476c215019aSStefano Zampini /* memory type */ 7477c215019aSStefano Zampini mmdata->mtype = PETSC_MEMTYPE_HOST; 74789566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7479d5e393b6SSuyash Tandon PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 74809566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7481c215019aSStefano Zampini if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7482d5e393b6SSuyash Tandon else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 74833214990dSStefano Zampini else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7484c215019aSStefano Zampini 74854e84afc0SStefano Zampini /* prepare coo coordinates for values insertion */ 7486ddea5d60SJunchao Zhang 7487ddea5d60SJunchao Zhang /* count total nonzeros of those intermediate seqaij Mats 7488ddea5d60SJunchao Zhang ncoo_d: # of nonzeros of matrices that do not have offproc entries 7489ddea5d60SJunchao Zhang ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7490ddea5d60SJunchao Zhang ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7491ddea5d60SJunchao Zhang */ 74924e84afc0SStefano Zampini for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 74934e84afc0SStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 74944e84afc0SStefano Zampini if (mptmp[cp]) continue; 7495ddea5d60SJunchao Zhang if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 74964e84afc0SStefano Zampini const PetscInt *rmap = rmapa[cp]; 74974e84afc0SStefano Zampini const PetscInt mr = mp[cp]->rmap->n; 74984e84afc0SStefano Zampini const PetscInt rs = C->rmap->rstart; 74994e84afc0SStefano Zampini const PetscInt re = C->rmap->rend; 75004e84afc0SStefano Zampini const PetscInt *ii = mm->i; 75014e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 75024e84afc0SStefano Zampini const PetscInt gr = rmap[i]; 75034e84afc0SStefano Zampini const PetscInt nz = ii[i + 1] - ii[i]; 7504ddea5d60SJunchao Zhang if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7505ddea5d60SJunchao Zhang else ncoo_oown += nz; /* this row is local */ 75064e84afc0SStefano Zampini } 75074e84afc0SStefano Zampini } else ncoo_d += mm->nz; 75084e84afc0SStefano Zampini } 7509ddea5d60SJunchao Zhang 7510ddea5d60SJunchao Zhang /* 7511ddea5d60SJunchao Zhang ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7512ddea5d60SJunchao Zhang 7513ddea5d60SJunchao Zhang ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7514ddea5d60SJunchao Zhang 7515d5b43468SJose E. Roman off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7516ddea5d60SJunchao Zhang 7517ddea5d60SJunchao Zhang off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7518ddea5d60SJunchao Zhang own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7519ddea5d60SJunchao Zhang so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7520ddea5d60SJunchao Zhang 7521ddea5d60SJunchao Zhang coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7522da81f932SPierre Jolivet Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7523ddea5d60SJunchao Zhang */ 75249566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 75259566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7526ddea5d60SJunchao Zhang 7527ddea5d60SJunchao Zhang /* gather (i,j) of nonzeros inserted by remote procs */ 7528ddea5d60SJunchao Zhang if (hasoffproc) { 75294e84afc0SStefano Zampini PetscSF msf; 75304e84afc0SStefano Zampini PetscInt ncoo2, *coo_i2, *coo_j2; 75314e84afc0SStefano Zampini 75329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 75339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 75349566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7535ddea5d60SJunchao Zhang 75364e84afc0SStefano Zampini for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 75374e84afc0SStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 75384e84afc0SStefano Zampini PetscInt *idxoff = mmdata->off[cp]; 75394e84afc0SStefano Zampini PetscInt *idxown = mmdata->own[cp]; 7540ddea5d60SJunchao Zhang if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 75414e84afc0SStefano Zampini const PetscInt *rmap = rmapa[cp]; 75424e84afc0SStefano Zampini const PetscInt *cmap = cmapa[cp]; 75434e84afc0SStefano Zampini const PetscInt *ii = mm->i; 75444e84afc0SStefano Zampini PetscInt *coi = coo_i + ncoo_o; 75454e84afc0SStefano Zampini PetscInt *coj = coo_j + ncoo_o; 75464e84afc0SStefano Zampini const PetscInt mr = mp[cp]->rmap->n; 75474e84afc0SStefano Zampini const PetscInt rs = C->rmap->rstart; 75484e84afc0SStefano Zampini const PetscInt re = C->rmap->rend; 75494e84afc0SStefano Zampini const PetscInt cs = C->cmap->rstart; 75504e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 75514e84afc0SStefano Zampini const PetscInt *jj = mm->j + ii[i]; 75524e84afc0SStefano Zampini const PetscInt gr = rmap[i]; 75534e84afc0SStefano Zampini const PetscInt nz = ii[i + 1] - ii[i]; 7554ddea5d60SJunchao Zhang if (gr < rs || gr >= re) { /* this is an offproc row */ 75554e84afc0SStefano Zampini for (j = ii[i]; j < ii[i + 1]; j++) { 75564e84afc0SStefano Zampini *coi++ = gr; 75574e84afc0SStefano Zampini *idxoff++ = j; 75584e84afc0SStefano Zampini } 75594e84afc0SStefano Zampini if (!cmapt[cp]) { /* already global */ 75604e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j]; 75614e84afc0SStefano Zampini } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 75624e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 75634e84afc0SStefano Zampini } else { /* offdiag */ 75644e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 75654e84afc0SStefano Zampini } 75664e84afc0SStefano Zampini ncoo_o += nz; 7567ddea5d60SJunchao Zhang } else { /* this is a local row */ 75684e84afc0SStefano Zampini for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 75694e84afc0SStefano Zampini } 75704e84afc0SStefano Zampini } 75714e84afc0SStefano Zampini } 75724e84afc0SStefano Zampini mmdata->off[cp + 1] = idxoff; 75734e84afc0SStefano Zampini mmdata->own[cp + 1] = idxown; 75744e84afc0SStefano Zampini } 75754e84afc0SStefano Zampini 75769566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 75776497c311SBarry Smith PetscInt incoo_o; 75786497c311SBarry Smith PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 75796497c311SBarry Smith PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 75809566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 75819566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 75824e84afc0SStefano Zampini ncoo = ncoo_d + ncoo_oown + ncoo2; 75839566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 75849566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 75859566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 75869566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 75879566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 75889566063dSJacob Faibussowitsch PetscCall(PetscFree2(coo_i, coo_j)); 7589ddea5d60SJunchao Zhang /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 75909566063dSJacob Faibussowitsch PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 75914e84afc0SStefano Zampini coo_i = coo_i2; 75924e84afc0SStefano Zampini coo_j = coo_j2; 75934e84afc0SStefano Zampini } else { /* no offproc values insertion */ 75944e84afc0SStefano Zampini ncoo = ncoo_d; 75959566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7596c215019aSStefano Zampini 75979566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 75989566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 75999566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(mmdata->sf)); 76004e84afc0SStefano Zampini } 7601c215019aSStefano Zampini mmdata->hasoffproc = hasoffproc; 76024e84afc0SStefano Zampini 7603ddea5d60SJunchao Zhang /* gather (i,j) of nonzeros inserted locally */ 76044e84afc0SStefano Zampini for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 76054e84afc0SStefano Zampini Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 76064e84afc0SStefano Zampini PetscInt *coi = coo_i + ncoo_d; 76074e84afc0SStefano Zampini PetscInt *coj = coo_j + ncoo_d; 76084e84afc0SStefano Zampini const PetscInt *jj = mm->j; 76094e84afc0SStefano Zampini const PetscInt *ii = mm->i; 76104e84afc0SStefano Zampini const PetscInt *cmap = cmapa[cp]; 76114e84afc0SStefano Zampini const PetscInt *rmap = rmapa[cp]; 76124e84afc0SStefano Zampini const PetscInt mr = mp[cp]->rmap->n; 76134e84afc0SStefano Zampini const PetscInt rs = C->rmap->rstart; 76144e84afc0SStefano Zampini const PetscInt re = C->rmap->rend; 76154e84afc0SStefano Zampini const PetscInt cs = C->cmap->rstart; 76164e84afc0SStefano Zampini 76174e84afc0SStefano Zampini if (mptmp[cp]) continue; 7618ddea5d60SJunchao Zhang if (rmapt[cp] == 1) { /* consecutive rows */ 7619ddea5d60SJunchao Zhang /* fill coo_i */ 76204e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 76214e84afc0SStefano Zampini const PetscInt gr = i + rs; 76224e84afc0SStefano Zampini for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 76234e84afc0SStefano Zampini } 7624ddea5d60SJunchao Zhang /* fill coo_j */ 7625ddea5d60SJunchao Zhang if (!cmapt[cp]) { /* type-0, already global */ 76269566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7627ddea5d60SJunchao Zhang } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7628ddea5d60SJunchao Zhang for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7629ddea5d60SJunchao Zhang } else { /* type-2, local to global for sparse columns */ 76304e84afc0SStefano Zampini for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 76314e84afc0SStefano Zampini } 76324e84afc0SStefano Zampini ncoo_d += mm->nz; 7633ddea5d60SJunchao Zhang } else if (rmapt[cp] == 2) { /* sparse rows */ 76344e84afc0SStefano Zampini for (i = 0; i < mr; i++) { 76354e84afc0SStefano Zampini const PetscInt *jj = mm->j + ii[i]; 76364e84afc0SStefano Zampini const PetscInt gr = rmap[i]; 76374e84afc0SStefano Zampini const PetscInt nz = ii[i + 1] - ii[i]; 7638ddea5d60SJunchao Zhang if (gr >= rs && gr < re) { /* local rows */ 76394e84afc0SStefano Zampini for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7640ddea5d60SJunchao Zhang if (!cmapt[cp]) { /* type-0, already global */ 76414e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j]; 76424e84afc0SStefano Zampini } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 76434e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7644ddea5d60SJunchao Zhang } else { /* type-2, local to global for sparse columns */ 76454e84afc0SStefano Zampini for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 76464e84afc0SStefano Zampini } 76474e84afc0SStefano Zampini ncoo_d += nz; 76484e84afc0SStefano Zampini } 76494e84afc0SStefano Zampini } 76504e84afc0SStefano Zampini } 76514e84afc0SStefano Zampini } 765248a46eb9SPierre Jolivet if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 76539566063dSJacob Faibussowitsch PetscCall(ISDestroy(&glob)); 765448a46eb9SPierre Jolivet if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 76559566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7656ddea5d60SJunchao Zhang /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 76579566063dSJacob Faibussowitsch PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 76584e84afc0SStefano Zampini 76594e84afc0SStefano Zampini /* preallocate with COO data */ 76609566063dSJacob Faibussowitsch PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 76619566063dSJacob Faibussowitsch PetscCall(PetscFree2(coo_i, coo_j)); 76623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 76634e84afc0SStefano Zampini } 76644e84afc0SStefano Zampini 7665d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7666d71ae5a4SJacob Faibussowitsch { 76674e84afc0SStefano Zampini Mat_Product *product = mat->product; 76684e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 76694e84afc0SStefano Zampini PetscBool match = PETSC_FALSE; 7670abb89eb1SStefano Zampini PetscBool usecpu = PETSC_FALSE; 76714e84afc0SStefano Zampini #else 76724e84afc0SStefano Zampini PetscBool match = PETSC_TRUE; 76734e84afc0SStefano Zampini #endif 76744e84afc0SStefano Zampini 76754e84afc0SStefano Zampini PetscFunctionBegin; 76764e84afc0SStefano Zampini MatCheckProduct(mat, 1); 76774e84afc0SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 767848a46eb9SPierre Jolivet if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 767965e4b4d4SStefano Zampini if (match) { /* we can always fallback to the CPU if requested */ 7680abb89eb1SStefano Zampini switch (product->type) { 7681abb89eb1SStefano Zampini case MATPRODUCT_AB: 7682abb89eb1SStefano Zampini if (product->api_user) { 7683d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 76849566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7685d0609cedSBarry Smith PetscOptionsEnd(); 7686abb89eb1SStefano Zampini } else { 7687d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 76889566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7689d0609cedSBarry Smith PetscOptionsEnd(); 7690abb89eb1SStefano Zampini } 7691abb89eb1SStefano Zampini break; 7692abb89eb1SStefano Zampini case MATPRODUCT_AtB: 7693abb89eb1SStefano Zampini if (product->api_user) { 7694d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 76959566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7696d0609cedSBarry Smith PetscOptionsEnd(); 7697abb89eb1SStefano Zampini } else { 7698d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 76999566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7700d0609cedSBarry Smith PetscOptionsEnd(); 7701abb89eb1SStefano Zampini } 7702abb89eb1SStefano Zampini break; 7703abb89eb1SStefano Zampini case MATPRODUCT_PtAP: 7704abb89eb1SStefano Zampini if (product->api_user) { 7705d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 77069566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7707d0609cedSBarry Smith PetscOptionsEnd(); 7708abb89eb1SStefano Zampini } else { 7709d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 77109566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7711d0609cedSBarry Smith PetscOptionsEnd(); 7712abb89eb1SStefano Zampini } 7713abb89eb1SStefano Zampini break; 7714d71ae5a4SJacob Faibussowitsch default: 7715d71ae5a4SJacob Faibussowitsch break; 7716abb89eb1SStefano Zampini } 7717abb89eb1SStefano Zampini match = (PetscBool)!usecpu; 7718abb89eb1SStefano Zampini } 77194e84afc0SStefano Zampini #endif 77204e84afc0SStefano Zampini if (match) { 77214e84afc0SStefano Zampini switch (product->type) { 77224e84afc0SStefano Zampini case MATPRODUCT_AB: 77234e84afc0SStefano Zampini case MATPRODUCT_AtB: 7724d71ae5a4SJacob Faibussowitsch case MATPRODUCT_PtAP: 7725d71ae5a4SJacob Faibussowitsch mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7726d71ae5a4SJacob Faibussowitsch break; 7727d71ae5a4SJacob Faibussowitsch default: 7728d71ae5a4SJacob Faibussowitsch break; 77294e84afc0SStefano Zampini } 77304e84afc0SStefano Zampini } 77314e84afc0SStefano Zampini /* fallback to MPIAIJ ops */ 77329566063dSJacob Faibussowitsch if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 77333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 773481824310SBarry Smith } 773598921bdaSJacob Faibussowitsch 773698921bdaSJacob Faibussowitsch /* 773772833a62Smarkadams4 Produces a set of block column indices of the matrix row, one for each block represented in the original row 773872833a62Smarkadams4 773972833a62Smarkadams4 n - the number of block indices in cc[] 774072833a62Smarkadams4 cc - the block indices (must be large enough to contain the indices) 774172833a62Smarkadams4 */ 7742d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7743d71ae5a4SJacob Faibussowitsch { 774472833a62Smarkadams4 PetscInt cnt = -1, nidx, j; 774572833a62Smarkadams4 const PetscInt *idx; 774672833a62Smarkadams4 774772833a62Smarkadams4 PetscFunctionBegin; 774872833a62Smarkadams4 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 774972833a62Smarkadams4 if (nidx) { 775072833a62Smarkadams4 cnt = 0; 775172833a62Smarkadams4 cc[cnt] = idx[0] / bs; 775272833a62Smarkadams4 for (j = 1; j < nidx; j++) { 775372833a62Smarkadams4 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 775472833a62Smarkadams4 } 775572833a62Smarkadams4 } 775672833a62Smarkadams4 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 775772833a62Smarkadams4 *n = cnt + 1; 77583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 775972833a62Smarkadams4 } 776072833a62Smarkadams4 776172833a62Smarkadams4 /* 776272833a62Smarkadams4 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 776372833a62Smarkadams4 776472833a62Smarkadams4 ncollapsed - the number of block indices 776572833a62Smarkadams4 collapsed - the block indices (must be large enough to contain the indices) 776672833a62Smarkadams4 */ 7767d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7768d71ae5a4SJacob Faibussowitsch { 776972833a62Smarkadams4 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 777072833a62Smarkadams4 777172833a62Smarkadams4 PetscFunctionBegin; 777272833a62Smarkadams4 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 777372833a62Smarkadams4 for (i = start + 1; i < start + bs; i++) { 777472833a62Smarkadams4 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 777572833a62Smarkadams4 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 77769371c9d4SSatish Balay cprevtmp = cprev; 77779371c9d4SSatish Balay cprev = merged; 77789371c9d4SSatish Balay merged = cprevtmp; 777972833a62Smarkadams4 } 778072833a62Smarkadams4 *ncollapsed = nprev; 778172833a62Smarkadams4 if (collapsed) *collapsed = cprev; 77823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 778372833a62Smarkadams4 } 778472833a62Smarkadams4 77852d776b49SBarry Smith /* 778672833a62Smarkadams4 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 778772833a62Smarkadams4 778872833a62Smarkadams4 Input Parameter: 778972833a62Smarkadams4 . Amat - matrix 779072833a62Smarkadams4 - symmetrize - make the result symmetric 779172833a62Smarkadams4 + scale - scale with diagonal 779272833a62Smarkadams4 779372833a62Smarkadams4 Output Parameter: 779472833a62Smarkadams4 . a_Gmat - output scalar graph >= 0 779572833a62Smarkadams4 779672833a62Smarkadams4 */ 7797e02fb3cdSMark Adams PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7798d71ae5a4SJacob Faibussowitsch { 779972833a62Smarkadams4 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 780072833a62Smarkadams4 MPI_Comm comm; 780172833a62Smarkadams4 Mat Gmat; 780272833a62Smarkadams4 PetscBool ismpiaij, isseqaij; 780372833a62Smarkadams4 Mat a, b, c; 780472833a62Smarkadams4 MatType jtype; 780572833a62Smarkadams4 780672833a62Smarkadams4 PetscFunctionBegin; 780772833a62Smarkadams4 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 780872833a62Smarkadams4 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 780972833a62Smarkadams4 PetscCall(MatGetSize(Amat, &MM, &NN)); 781072833a62Smarkadams4 PetscCall(MatGetBlockSize(Amat, &bs)); 781172833a62Smarkadams4 nloc = (Iend - Istart) / bs; 781272833a62Smarkadams4 781372833a62Smarkadams4 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 781472833a62Smarkadams4 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 781572833a62Smarkadams4 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 781672833a62Smarkadams4 781772833a62Smarkadams4 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 781872833a62Smarkadams4 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 781972833a62Smarkadams4 implementation */ 782072833a62Smarkadams4 if (bs > 1) { 782172833a62Smarkadams4 PetscCall(MatGetType(Amat, &jtype)); 782272833a62Smarkadams4 PetscCall(MatCreate(comm, &Gmat)); 782372833a62Smarkadams4 PetscCall(MatSetType(Gmat, jtype)); 782472833a62Smarkadams4 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 782572833a62Smarkadams4 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 782672833a62Smarkadams4 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 782772833a62Smarkadams4 PetscInt *d_nnz, *o_nnz; 78282cf69117Smarkadams4 MatScalar *aa, val, *AA; 78292cf69117Smarkadams4 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 78306497c311SBarry Smith 78319371c9d4SSatish Balay if (isseqaij) { 78329371c9d4SSatish Balay a = Amat; 78339371c9d4SSatish Balay b = NULL; 78349371c9d4SSatish Balay } else { 783572833a62Smarkadams4 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 78369371c9d4SSatish Balay a = d->A; 78379371c9d4SSatish Balay b = d->B; 783872833a62Smarkadams4 } 783972833a62Smarkadams4 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 784032603206SJames Wright PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 784172833a62Smarkadams4 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 78422cf69117Smarkadams4 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 784359ee9f9fSPierre Jolivet const PetscInt *cols1, *cols2; 78446497c311SBarry Smith 784559ee9f9fSPierre Jolivet for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 784659ee9f9fSPierre Jolivet PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 784759ee9f9fSPierre Jolivet nnz[brow / bs] = nc2 / bs; 784859ee9f9fSPierre Jolivet if (nc2 % bs) ok = 0; 784972833a62Smarkadams4 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 785059ee9f9fSPierre Jolivet for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 785159ee9f9fSPierre Jolivet PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 785259ee9f9fSPierre Jolivet if (nc1 != nc2) ok = 0; 785359ee9f9fSPierre Jolivet else { 785459ee9f9fSPierre Jolivet for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 785559ee9f9fSPierre Jolivet if (cols1[jj] != cols2[jj]) ok = 0; 785659ee9f9fSPierre Jolivet if (cols1[jj] % bs != jj % bs) ok = 0; 785772833a62Smarkadams4 } 785859ee9f9fSPierre Jolivet } 785959ee9f9fSPierre Jolivet PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 786059ee9f9fSPierre Jolivet } 786159ee9f9fSPierre Jolivet PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 786272833a62Smarkadams4 if (!ok) { 786372833a62Smarkadams4 PetscCall(PetscFree2(d_nnz, o_nnz)); 786459ee9f9fSPierre Jolivet PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 786572833a62Smarkadams4 goto old_bs; 786672833a62Smarkadams4 } 786772833a62Smarkadams4 } 786872833a62Smarkadams4 } 786972833a62Smarkadams4 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 787072833a62Smarkadams4 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 787172833a62Smarkadams4 PetscCall(PetscFree2(d_nnz, o_nnz)); 78722cf69117Smarkadams4 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 787372833a62Smarkadams4 // diag 787472833a62Smarkadams4 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 787572833a62Smarkadams4 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 78766497c311SBarry Smith 787772833a62Smarkadams4 ai = aseq->i; 787872833a62Smarkadams4 n = ai[brow + 1] - ai[brow]; 787972833a62Smarkadams4 aj = aseq->j + ai[brow]; 78806497c311SBarry Smith for (PetscInt k = 0; k < n; k += bs) { // block columns 788172833a62Smarkadams4 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 788272833a62Smarkadams4 val = 0; 7883e02fb3cdSMark Adams if (index_size == 0) { 78846497c311SBarry Smith for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 788572833a62Smarkadams4 aa = aseq->a + ai[brow + ii] + k; 78866497c311SBarry Smith for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 788772833a62Smarkadams4 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 788872833a62Smarkadams4 } 788972833a62Smarkadams4 } 7890e02fb3cdSMark Adams } else { // use (index,index) value if provided 78916497c311SBarry Smith for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 78926497c311SBarry Smith PetscInt ii = index[iii]; 7893e02fb3cdSMark Adams aa = aseq->a + ai[brow + ii] + k; 78946497c311SBarry Smith for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 78956497c311SBarry Smith PetscInt jj = index[jjj]; 7896cd5bc9d0SMark Adams val += PetscAbs(PetscRealPart(aa[jj])); 7897e02fb3cdSMark Adams } 7898e02fb3cdSMark Adams } 7899e02fb3cdSMark Adams } 7900835f2295SStefano Zampini PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 790172833a62Smarkadams4 AA[k / bs] = val; 790272833a62Smarkadams4 } 790372833a62Smarkadams4 grow = Istart / bs + brow / bs; 790466521e1fSMark Adams PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 790572833a62Smarkadams4 } 790672833a62Smarkadams4 // off-diag 790772833a62Smarkadams4 if (ismpiaij) { 790872833a62Smarkadams4 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 790972833a62Smarkadams4 const PetscScalar *vals; 791072833a62Smarkadams4 const PetscInt *cols, *garray = aij->garray; 79116497c311SBarry Smith 791272833a62Smarkadams4 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 791372833a62Smarkadams4 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 791472833a62Smarkadams4 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 79156497c311SBarry Smith for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 791659ee9f9fSPierre Jolivet PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 791772833a62Smarkadams4 AA[k / bs] = 0; 791872833a62Smarkadams4 AJ[cidx] = garray[cols[k]] / bs; 791972833a62Smarkadams4 } 792072833a62Smarkadams4 nc = ncols / bs; 792172833a62Smarkadams4 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7922e02fb3cdSMark Adams if (index_size == 0) { 79236497c311SBarry Smith for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 792472833a62Smarkadams4 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 79256497c311SBarry Smith for (PetscInt k = 0; k < ncols; k += bs) { 79266497c311SBarry Smith for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7927835f2295SStefano Zampini PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 792872833a62Smarkadams4 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 792972833a62Smarkadams4 } 793072833a62Smarkadams4 } 793172833a62Smarkadams4 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 793272833a62Smarkadams4 } 7933e02fb3cdSMark Adams } else { // use (index,index) value if provided 79346497c311SBarry Smith for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 79356497c311SBarry Smith PetscInt ii = index[iii]; 7936e02fb3cdSMark Adams PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 79376497c311SBarry Smith for (PetscInt k = 0; k < ncols; k += bs) { 79386497c311SBarry Smith for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 79396497c311SBarry Smith PetscInt jj = index[jjj]; 7940e02fb3cdSMark Adams AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7941e02fb3cdSMark Adams } 7942e02fb3cdSMark Adams } 7943e02fb3cdSMark Adams PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7944e02fb3cdSMark Adams } 7945e02fb3cdSMark Adams } 794672833a62Smarkadams4 grow = Istart / bs + brow / bs; 794766521e1fSMark Adams PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 794872833a62Smarkadams4 } 794972833a62Smarkadams4 } 795072833a62Smarkadams4 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 795172833a62Smarkadams4 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 79522cf69117Smarkadams4 PetscCall(PetscFree2(AA, AJ)); 795372833a62Smarkadams4 } else { 795472833a62Smarkadams4 const PetscScalar *vals; 795572833a62Smarkadams4 const PetscInt *idx; 795672833a62Smarkadams4 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 795772833a62Smarkadams4 old_bs: 795872833a62Smarkadams4 /* 795972833a62Smarkadams4 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 796072833a62Smarkadams4 */ 796172833a62Smarkadams4 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 796232603206SJames Wright PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 796372833a62Smarkadams4 if (isseqaij) { 796472833a62Smarkadams4 PetscInt max_d_nnz; 79656497c311SBarry Smith 796672833a62Smarkadams4 /* 796772833a62Smarkadams4 Determine exact preallocation count for (sequential) scalar matrix 796872833a62Smarkadams4 */ 796972833a62Smarkadams4 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 797072833a62Smarkadams4 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 797172833a62Smarkadams4 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 797248a46eb9SPierre Jolivet for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 797372833a62Smarkadams4 PetscCall(PetscFree3(w0, w1, w2)); 797472833a62Smarkadams4 } else if (ismpiaij) { 797572833a62Smarkadams4 Mat Daij, Oaij; 797672833a62Smarkadams4 const PetscInt *garray; 797772833a62Smarkadams4 PetscInt max_d_nnz; 79786497c311SBarry Smith 797972833a62Smarkadams4 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 798072833a62Smarkadams4 /* 798172833a62Smarkadams4 Determine exact preallocation count for diagonal block portion of scalar matrix 798272833a62Smarkadams4 */ 798372833a62Smarkadams4 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 798472833a62Smarkadams4 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 798572833a62Smarkadams4 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 798648a46eb9SPierre Jolivet for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 798772833a62Smarkadams4 PetscCall(PetscFree3(w0, w1, w2)); 798872833a62Smarkadams4 /* 798972833a62Smarkadams4 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 799072833a62Smarkadams4 */ 799172833a62Smarkadams4 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 799272833a62Smarkadams4 o_nnz[jj] = 0; 799372833a62Smarkadams4 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 799472833a62Smarkadams4 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 799572833a62Smarkadams4 o_nnz[jj] += ncols; 799672833a62Smarkadams4 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 799772833a62Smarkadams4 } 799872833a62Smarkadams4 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 799972833a62Smarkadams4 } 800072833a62Smarkadams4 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 800172833a62Smarkadams4 /* get scalar copy (norms) of matrix */ 800272833a62Smarkadams4 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 800372833a62Smarkadams4 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 800472833a62Smarkadams4 PetscCall(PetscFree2(d_nnz, o_nnz)); 800572833a62Smarkadams4 for (Ii = Istart; Ii < Iend; Ii++) { 800672833a62Smarkadams4 PetscInt dest_row = Ii / bs; 80076497c311SBarry Smith 800872833a62Smarkadams4 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 800972833a62Smarkadams4 for (jj = 0; jj < ncols; jj++) { 801072833a62Smarkadams4 PetscInt dest_col = idx[jj] / bs; 801172833a62Smarkadams4 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 80126497c311SBarry Smith 801372833a62Smarkadams4 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 801472833a62Smarkadams4 } 801572833a62Smarkadams4 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 801672833a62Smarkadams4 } 801772833a62Smarkadams4 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 801872833a62Smarkadams4 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 801972833a62Smarkadams4 } 802072833a62Smarkadams4 } else { 80212d776b49SBarry Smith if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 80222d776b49SBarry Smith else { 80232d776b49SBarry Smith Gmat = Amat; 80242d776b49SBarry Smith PetscCall(PetscObjectReference((PetscObject)Gmat)); 80252d776b49SBarry Smith } 80269371c9d4SSatish Balay if (isseqaij) { 80279371c9d4SSatish Balay a = Gmat; 80289371c9d4SSatish Balay b = NULL; 80299371c9d4SSatish Balay } else { 803072833a62Smarkadams4 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 80319371c9d4SSatish Balay a = d->A; 80329371c9d4SSatish Balay b = d->B; 803372833a62Smarkadams4 } 80342d776b49SBarry Smith if (filter >= 0 || scale) { 80352d776b49SBarry Smith /* take absolute value of each entry */ 803672833a62Smarkadams4 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 803772833a62Smarkadams4 MatInfo info; 803872833a62Smarkadams4 PetscScalar *avals; 80396497c311SBarry Smith 804072833a62Smarkadams4 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 804172833a62Smarkadams4 PetscCall(MatSeqAIJGetArray(c, &avals)); 804272833a62Smarkadams4 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 804372833a62Smarkadams4 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 804472833a62Smarkadams4 } 804572833a62Smarkadams4 } 80462d776b49SBarry Smith } 804772833a62Smarkadams4 if (symmetrize) { 8048b94d7dedSBarry Smith PetscBool isset, issym; 80496497c311SBarry Smith 8050b94d7dedSBarry Smith PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8051b94d7dedSBarry Smith if (!isset || !issym) { 805272833a62Smarkadams4 Mat matTrans; 80536497c311SBarry Smith 805472833a62Smarkadams4 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 80551fcb517eSBarry Smith PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 805672833a62Smarkadams4 PetscCall(MatDestroy(&matTrans)); 805772833a62Smarkadams4 } 805872833a62Smarkadams4 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 80592d776b49SBarry Smith } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 806072833a62Smarkadams4 if (scale) { 806172833a62Smarkadams4 /* scale c for all diagonal values = 1 or -1 */ 806272833a62Smarkadams4 Vec diag; 80636497c311SBarry Smith 806472833a62Smarkadams4 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 806572833a62Smarkadams4 PetscCall(MatGetDiagonal(Gmat, diag)); 806672833a62Smarkadams4 PetscCall(VecReciprocal(diag)); 806772833a62Smarkadams4 PetscCall(VecSqrtAbs(diag)); 806872833a62Smarkadams4 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 806972833a62Smarkadams4 PetscCall(VecDestroy(&diag)); 807072833a62Smarkadams4 } 807172833a62Smarkadams4 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 80722d776b49SBarry Smith if (filter >= 0) { 80732ce66baaSPierre Jolivet PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 80742ce66baaSPierre Jolivet PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 80752d776b49SBarry Smith } 807672833a62Smarkadams4 *a_Gmat = Gmat; 80773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 807872833a62Smarkadams4 } 807972833a62Smarkadams4 808072833a62Smarkadams4 /* 808198921bdaSJacob Faibussowitsch Special version for direct calls from Fortran 808298921bdaSJacob Faibussowitsch */ 808398921bdaSJacob Faibussowitsch 808498921bdaSJacob Faibussowitsch /* Change these macros so can be used in void function */ 80859566063dSJacob Faibussowitsch /* Identical to PetscCallVoid, except it assigns to *_ierr */ 80869566063dSJacob Faibussowitsch #undef PetscCall 80879371c9d4SSatish Balay #define PetscCall(...) \ 80889371c9d4SSatish Balay do { \ 80895f80ce2aSJacob Faibussowitsch PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 809098921bdaSJacob Faibussowitsch if (PetscUnlikely(ierr_msv_mpiaij)) { \ 809198921bdaSJacob Faibussowitsch *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 809298921bdaSJacob Faibussowitsch return; \ 809398921bdaSJacob Faibussowitsch } \ 809498921bdaSJacob Faibussowitsch } while (0) 809598921bdaSJacob Faibussowitsch 809698921bdaSJacob Faibussowitsch #undef SETERRQ 80979371c9d4SSatish Balay #define SETERRQ(comm, ierr, ...) \ 80989371c9d4SSatish Balay do { \ 809998921bdaSJacob Faibussowitsch *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 810098921bdaSJacob Faibussowitsch return; \ 810198921bdaSJacob Faibussowitsch } while (0) 810298921bdaSJacob Faibussowitsch 810398921bdaSJacob Faibussowitsch #if defined(PETSC_HAVE_FORTRAN_CAPS) 810498921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 810598921bdaSJacob Faibussowitsch #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 810698921bdaSJacob Faibussowitsch #define matsetvaluesmpiaij_ matsetvaluesmpiaij 810798921bdaSJacob Faibussowitsch #else 810898921bdaSJacob Faibussowitsch #endif 8109d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8110d71ae5a4SJacob Faibussowitsch { 811198921bdaSJacob Faibussowitsch Mat mat = *mmat; 811298921bdaSJacob Faibussowitsch PetscInt m = *mm, n = *mn; 811398921bdaSJacob Faibussowitsch InsertMode addv = *maddv; 811498921bdaSJacob Faibussowitsch Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 811598921bdaSJacob Faibussowitsch PetscScalar value; 811698921bdaSJacob Faibussowitsch 811798921bdaSJacob Faibussowitsch MatCheckPreallocated(mat, 1); 811898921bdaSJacob Faibussowitsch if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 81195f80ce2aSJacob Faibussowitsch else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 812098921bdaSJacob Faibussowitsch { 812198921bdaSJacob Faibussowitsch PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 812298921bdaSJacob Faibussowitsch PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 812398921bdaSJacob Faibussowitsch PetscBool roworiented = aij->roworiented; 812498921bdaSJacob Faibussowitsch 812598921bdaSJacob Faibussowitsch /* Some Variables required in the macro */ 812698921bdaSJacob Faibussowitsch Mat A = aij->A; 812798921bdaSJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 812898921bdaSJacob Faibussowitsch PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 812998921bdaSJacob Faibussowitsch MatScalar *aa; 8130f4f49eeaSPierre Jolivet PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 813198921bdaSJacob Faibussowitsch Mat B = aij->B; 813298921bdaSJacob Faibussowitsch Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 813398921bdaSJacob Faibussowitsch PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 813498921bdaSJacob Faibussowitsch MatScalar *ba; 813598921bdaSJacob Faibussowitsch /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 813698921bdaSJacob Faibussowitsch * cannot use "#if defined" inside a macro. */ 813798921bdaSJacob Faibussowitsch PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 813898921bdaSJacob Faibussowitsch 813998921bdaSJacob Faibussowitsch PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 814098921bdaSJacob Faibussowitsch PetscInt nonew = a->nonew; 814198921bdaSJacob Faibussowitsch MatScalar *ap1, *ap2; 814298921bdaSJacob Faibussowitsch 814398921bdaSJacob Faibussowitsch PetscFunctionBegin; 81449566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &aa)); 81459566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(B, &ba)); 814698921bdaSJacob Faibussowitsch for (i = 0; i < m; i++) { 814798921bdaSJacob Faibussowitsch if (im[i] < 0) continue; 81486bdcaf15SBarry Smith PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 814998921bdaSJacob Faibussowitsch if (im[i] >= rstart && im[i] < rend) { 815098921bdaSJacob Faibussowitsch row = im[i] - rstart; 815198921bdaSJacob Faibussowitsch lastcol1 = -1; 815298921bdaSJacob Faibussowitsch rp1 = aj + ai[row]; 815398921bdaSJacob Faibussowitsch ap1 = aa + ai[row]; 815498921bdaSJacob Faibussowitsch rmax1 = aimax[row]; 815598921bdaSJacob Faibussowitsch nrow1 = ailen[row]; 815698921bdaSJacob Faibussowitsch low1 = 0; 815798921bdaSJacob Faibussowitsch high1 = nrow1; 815898921bdaSJacob Faibussowitsch lastcol2 = -1; 815998921bdaSJacob Faibussowitsch rp2 = bj + bi[row]; 816098921bdaSJacob Faibussowitsch ap2 = ba + bi[row]; 816198921bdaSJacob Faibussowitsch rmax2 = bimax[row]; 816298921bdaSJacob Faibussowitsch nrow2 = bilen[row]; 816398921bdaSJacob Faibussowitsch low2 = 0; 816498921bdaSJacob Faibussowitsch high2 = nrow2; 816598921bdaSJacob Faibussowitsch 816698921bdaSJacob Faibussowitsch for (j = 0; j < n; j++) { 816798921bdaSJacob Faibussowitsch if (roworiented) value = v[i * n + j]; 816898921bdaSJacob Faibussowitsch else value = v[i + j * m]; 816998921bdaSJacob Faibussowitsch if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 817098921bdaSJacob Faibussowitsch if (in[j] >= cstart && in[j] < cend) { 817198921bdaSJacob Faibussowitsch col = in[j] - cstart; 817298921bdaSJacob Faibussowitsch MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 817398921bdaSJacob Faibussowitsch } else if (in[j] < 0) continue; 817498921bdaSJacob Faibussowitsch else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 817563a3b9bcSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 817698921bdaSJacob Faibussowitsch } else { 817798921bdaSJacob Faibussowitsch if (mat->was_assembled) { 817848a46eb9SPierre Jolivet if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 817998921bdaSJacob Faibussowitsch #if defined(PETSC_USE_CTABLE) 8180eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 818198921bdaSJacob Faibussowitsch col--; 818298921bdaSJacob Faibussowitsch #else 818398921bdaSJacob Faibussowitsch col = aij->colmap[in[j]] - 1; 818498921bdaSJacob Faibussowitsch #endif 8185f4f49eeaSPierre Jolivet if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 81869566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIAIJ(mat)); 818798921bdaSJacob Faibussowitsch col = in[j]; 818898921bdaSJacob Faibussowitsch /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 818998921bdaSJacob Faibussowitsch B = aij->B; 819098921bdaSJacob Faibussowitsch b = (Mat_SeqAIJ *)B->data; 81919371c9d4SSatish Balay bimax = b->imax; 81929371c9d4SSatish Balay bi = b->i; 81939371c9d4SSatish Balay bilen = b->ilen; 81949371c9d4SSatish Balay bj = b->j; 819598921bdaSJacob Faibussowitsch rp2 = bj + bi[row]; 819698921bdaSJacob Faibussowitsch ap2 = ba + bi[row]; 819798921bdaSJacob Faibussowitsch rmax2 = bimax[row]; 819898921bdaSJacob Faibussowitsch nrow2 = bilen[row]; 819998921bdaSJacob Faibussowitsch low2 = 0; 820098921bdaSJacob Faibussowitsch high2 = nrow2; 820198921bdaSJacob Faibussowitsch bm = aij->B->rmap->n; 820298921bdaSJacob Faibussowitsch ba = b->a; 820398921bdaSJacob Faibussowitsch inserted = PETSC_FALSE; 820498921bdaSJacob Faibussowitsch } 820598921bdaSJacob Faibussowitsch } else col = in[j]; 820698921bdaSJacob Faibussowitsch MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 820798921bdaSJacob Faibussowitsch } 820898921bdaSJacob Faibussowitsch } 820998921bdaSJacob Faibussowitsch } else if (!aij->donotstash) { 821098921bdaSJacob Faibussowitsch if (roworiented) { 82119566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 821298921bdaSJacob Faibussowitsch } else { 82139566063dSJacob Faibussowitsch PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 821498921bdaSJacob Faibussowitsch } 821598921bdaSJacob Faibussowitsch } 821698921bdaSJacob Faibussowitsch } 82179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A, &aa)); 82189566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(B, &ba)); 821998921bdaSJacob Faibussowitsch } 822098921bdaSJacob Faibussowitsch PetscFunctionReturnVoid(); 822198921bdaSJacob Faibussowitsch } 822272833a62Smarkadams4 822398921bdaSJacob Faibussowitsch /* Undefining these here since they were redefined from their original definition above! No 822498921bdaSJacob Faibussowitsch * other PETSc functions should be defined past this point, as it is impossible to recover the 822598921bdaSJacob Faibussowitsch * original definitions */ 82269566063dSJacob Faibussowitsch #undef PetscCall 822798921bdaSJacob Faibussowitsch #undef SETERRQ 8228