1c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 2c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 382b1193eSBarry Smith 4b350b9acSSatish Balay /*@ 511a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 6ff585edeSJed Brown 711a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 8ff585edeSJed Brown 9ff585edeSJed Brown Input Parameter: 1011a5261eSBarry Smith . A - the `MATMAIJ` matrix 11ff585edeSJed Brown 12ff585edeSJed Brown Output Parameter: 1311a5261eSBarry Smith . B - the `MATAIJ` matrix 14ff585edeSJed Brown 15ff585edeSJed Brown Level: advanced 16ff585edeSJed Brown 1711a5261eSBarry Smith Note: 1811a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 19ff585edeSJed Brown 201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 21ff585edeSJed Brown @*/ 22d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 23d71ae5a4SJacob Faibussowitsch { 24ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 25b9b97703SBarry Smith 26b9b97703SBarry Smith PetscFunctionBegin; 279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29b9b97703SBarry Smith if (ismpimaij) { 30b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31b9b97703SBarry Smith 32b9b97703SBarry Smith *B = b->A; 33b9b97703SBarry Smith } else if (isseqmaij) { 34b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35b9b97703SBarry Smith 36b9b97703SBarry Smith *B = b->AIJ; 37b9b97703SBarry Smith } else { 38b9b97703SBarry Smith *B = A; 39b9b97703SBarry Smith } 403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41b9b97703SBarry Smith } 42b9b97703SBarry Smith 43480dffcfSJed Brown /*@ 4411a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45ff585edeSJed Brown 463f9fe445SBarry Smith Logically Collective 47ff585edeSJed Brown 48d8d19677SJose E. Roman Input Parameters: 4911a5261eSBarry Smith + A - the `MATMAIJ` matrix 50ff585edeSJed Brown - dof - the block size for the new matrix 51ff585edeSJed Brown 52ff585edeSJed Brown Output Parameter: 5311a5261eSBarry Smith . B - the new `MATMAIJ` matrix 54ff585edeSJed Brown 55ff585edeSJed Brown Level: advanced 56ff585edeSJed Brown 571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 58ff585edeSJed Brown @*/ 59d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 60d71ae5a4SJacob Faibussowitsch { 610298fd71SBarry Smith Mat Aij = NULL; 62b9b97703SBarry Smith 63b9b97703SBarry Smith PetscFunctionBegin; 64c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 659566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 669566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 68b9b97703SBarry Smith } 69b9b97703SBarry Smith 7080eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 71d71ae5a4SJacob Faibussowitsch { 724cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7382b1193eSBarry Smith 7482b1193eSBarry Smith PetscFunctionBegin; 759566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 769566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 772e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 814cb9d9b8SBarry Smith } 824cb9d9b8SBarry Smith 8380eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A) 84d71ae5a4SJacob Faibussowitsch { 85356c569eSBarry Smith PetscFunctionBegin; 86ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 87356c569eSBarry Smith } 88356c569eSBarry Smith 8980eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 90d71ae5a4SJacob Faibussowitsch { 910fd73130SBarry Smith Mat B; 920fd73130SBarry Smith 930fd73130SBarry Smith PetscFunctionBegin; 949566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 959566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 969566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 980fd73130SBarry Smith } 990fd73130SBarry Smith 10080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 101d71ae5a4SJacob Faibussowitsch { 1020fd73130SBarry Smith Mat B; 1030fd73130SBarry Smith 1040fd73130SBarry Smith PetscFunctionBegin; 1059566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1069566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1090fd73130SBarry Smith } 1100fd73130SBarry Smith 11180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 112d71ae5a4SJacob Faibussowitsch { 1134cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1144cb9d9b8SBarry Smith 1154cb9d9b8SBarry Smith PetscFunctionBegin; 1169566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1199566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1209566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1219566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1222e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 127b9b97703SBarry Smith } 128b9b97703SBarry Smith 1290bad9183SKris Buschelman /*MC 130fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1310bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13211a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1330bad9183SKris Buschelman 1340bad9183SKris Buschelman Operations provided: 13567b8a455SSatish Balay .vb 13611a5261eSBarry Smith MatMult() 13711a5261eSBarry Smith MatMultTranspose() 13811a5261eSBarry Smith MatMultAdd() 13911a5261eSBarry Smith MatMultTransposeAdd() 14067b8a455SSatish Balay .ve 1410bad9183SKris Buschelman 1420bad9183SKris Buschelman Level: advanced 1430bad9183SKris Buschelman 1441cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1450bad9183SKris Buschelman M*/ 1460bad9183SKris Buschelman 147d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 148d71ae5a4SJacob Faibussowitsch { 1494cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15064b52464SHong Zhang PetscMPIInt size; 15182b1193eSBarry Smith 15282b1193eSBarry Smith PetscFunctionBegin; 1534dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 154b0a32e0cSBarry Smith A->data = (void *)b; 15526fbe8dcSKarl Rupp 1569566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15726fbe8dcSKarl Rupp 158356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 159f4a53059SBarry Smith 160f4259b30SLisandro Dalcin b->AIJ = NULL; 161cd3bbe55SBarry Smith b->dof = 0; 162f4259b30SLisandro Dalcin b->OAIJ = NULL; 163f4259b30SLisandro Dalcin b->ctx = NULL; 164f4259b30SLisandro Dalcin b->w = NULL; 1659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16664b52464SHong Zhang if (size == 1) { 1679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16864b52464SHong Zhang } else { 1699566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17064b52464SHong Zhang } 17132e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17232e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 1733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17482b1193eSBarry Smith } 17582b1193eSBarry Smith 17680eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline) 17780eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline)) 17880eb40d0SJacob Faibussowitsch #else 17980eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE 18080eb40d0SJacob Faibussowitsch #endif 181bb5b24f5SJacob Faibussowitsch 1829927e305SJacob Faibussowitsch #if defined(__clang__) 183bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 184bb5b24f5SJacob Faibussowitsch #else 185bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL 186bb5b24f5SJacob Faibussowitsch #endif 187bb5b24f5SJacob Faibussowitsch 18880eb40d0SJacob Faibussowitsch enum { 18980eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 19080eb40d0SJacob Faibussowitsch }; 19180eb40d0SJacob Faibussowitsch 19280eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 19380eb40d0SJacob Faibussowitsch // keyword into account for these... 19480eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 19580eb40d0SJacob Faibussowitsch { 19680eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 19780eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19880eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 19980eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 20080eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 20180eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 20280eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j; 20380eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i; 20480eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a; 20580eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0; 20680eb40d0SJacob Faibussowitsch const PetscScalar *x; 20780eb40d0SJacob Faibussowitsch PetscScalar *z; 20880eb40d0SJacob Faibussowitsch 20980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 21080eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 21180eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 21280eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21380eb40d0SJacob Faibussowitsch if (mult_add) { 21480eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 21580eb40d0SJacob Faibussowitsch } else { 21680eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z)); 21780eb40d0SJacob Faibussowitsch } 21880eb40d0SJacob Faibussowitsch 21980eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) { 22080eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i]; 22180eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow; 22280eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls 22380eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 22480eb40d0SJacob Faibussowitsch 22580eb40d0SJacob Faibussowitsch nonzerorow += n > 0; 22680eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) { 22780eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow]; 22880eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow]; 22980eb40d0SJacob Faibussowitsch 230bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23180eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 23280eb40d0SJacob Faibussowitsch } 23380eb40d0SJacob Faibussowitsch 234bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23580eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) { 23680eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k; 23780eb40d0SJacob Faibussowitsch 23880eb40d0SJacob Faibussowitsch if (mult_add) { 23980eb40d0SJacob Faibussowitsch z[z_idx] += sum[k]; 24080eb40d0SJacob Faibussowitsch } else { 24180eb40d0SJacob Faibussowitsch z[z_idx] = sum[k]; 24280eb40d0SJacob Faibussowitsch } 24380eb40d0SJacob Faibussowitsch } 24480eb40d0SJacob Faibussowitsch } 24580eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 24680eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24780eb40d0SJacob Faibussowitsch if (mult_add) { 24880eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 24980eb40d0SJacob Faibussowitsch } else { 25080eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z)); 25180eb40d0SJacob Faibussowitsch } 25280eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25380eb40d0SJacob Faibussowitsch } 25480eb40d0SJacob Faibussowitsch 25580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 25680eb40d0SJacob Faibussowitsch { 25780eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 25880eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 25980eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 26080eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 26180eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 26280eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 26380eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j; 26480eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i; 26580eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a; 26680eb40d0SJacob Faibussowitsch const PetscScalar *x; 26780eb40d0SJacob Faibussowitsch PetscScalar *z; 26880eb40d0SJacob Faibussowitsch 26980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 27080eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 27180eb40d0SJacob Faibussowitsch if (mult_add) { 27280eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 27380eb40d0SJacob Faibussowitsch } else { 27480eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 27580eb40d0SJacob Faibussowitsch } 27680eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27780eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 27880eb40d0SJacob Faibussowitsch 27980eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) { 28080eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i]; 281*8e3a54c0SPierre Jolivet const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii); 282*8e3a54c0SPierre Jolivet const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii); 28380eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii; 28480eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 28580eb40d0SJacob Faibussowitsch 286bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 28780eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 28880eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) { 28980eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j]; 29080eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j]; 29180eb40d0SJacob Faibussowitsch 292bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 29380eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 29480eb40d0SJacob Faibussowitsch } 29580eb40d0SJacob Faibussowitsch } 29680eb40d0SJacob Faibussowitsch 29780eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz)); 29880eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 29980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 30080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30180eb40d0SJacob Faibussowitsch } 30280eb40d0SJacob Faibussowitsch 3039927e305SJacob Faibussowitsch #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 3049927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 3059927e305SJacob Faibussowitsch { \ 3069927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3079927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 3089927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3099927e305SJacob Faibussowitsch } \ 3109927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 3119927e305SJacob Faibussowitsch { \ 3129927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3139927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 3149927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3159927e305SJacob Faibussowitsch } \ 3169927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 3179927e305SJacob Faibussowitsch { \ 3189927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3199927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 3209927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3219927e305SJacob Faibussowitsch } \ 3229927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 3239927e305SJacob Faibussowitsch { \ 3249927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3259927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 3269927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 32782b1193eSBarry Smith } 328bcc973b7SBarry Smith 3299927e305SJacob Faibussowitsch // clang-format off 3309927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 3319927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 3329927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 3339927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 3349927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 3359927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 3369927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 3379927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 3389927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 3399927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 3409927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 3419927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 3429927e305SJacob Faibussowitsch // clang-format on 343bcc973b7SBarry Smith 3449927e305SJacob Faibussowitsch #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 345ed1c418cSBarry Smith 34680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 347d71ae5a4SJacob Faibussowitsch { 3486861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3496861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3506861f193SBarry Smith const PetscScalar *x, *v; 3516861f193SBarry Smith PetscScalar *y, *sums; 3526861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3536861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3546861f193SBarry Smith 3556861f193SBarry Smith PetscFunctionBegin; 3569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3579566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3589566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3596861f193SBarry Smith idx = a->j; 3606861f193SBarry Smith v = a->a; 3616861f193SBarry Smith ii = a->i; 3626861f193SBarry Smith 3636861f193SBarry Smith for (i = 0; i < m; i++) { 3646861f193SBarry Smith jrow = ii[i]; 3656861f193SBarry Smith n = ii[i + 1] - jrow; 3666861f193SBarry Smith sums = y + dof * i; 3676861f193SBarry Smith for (j = 0; j < n; j++) { 368ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 3696861f193SBarry Smith jrow++; 3706861f193SBarry Smith } 3716861f193SBarry Smith } 3726861f193SBarry Smith 3739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 3749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 3763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3776861f193SBarry Smith } 3786861f193SBarry Smith 37980eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 380d71ae5a4SJacob Faibussowitsch { 3816861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3826861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3836861f193SBarry Smith const PetscScalar *x, *v; 3846861f193SBarry Smith PetscScalar *y, *sums; 3856861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3866861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3876861f193SBarry Smith 3886861f193SBarry Smith PetscFunctionBegin; 3899566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3919566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 3926861f193SBarry Smith idx = a->j; 3936861f193SBarry Smith v = a->a; 3946861f193SBarry Smith ii = a->i; 3956861f193SBarry Smith 3966861f193SBarry Smith for (i = 0; i < m; i++) { 3976861f193SBarry Smith jrow = ii[i]; 3986861f193SBarry Smith n = ii[i + 1] - jrow; 3996861f193SBarry Smith sums = y + dof * i; 4006861f193SBarry Smith for (j = 0; j < n; j++) { 401ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 4026861f193SBarry Smith jrow++; 4036861f193SBarry Smith } 4046861f193SBarry Smith } 4056861f193SBarry Smith 4069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4106861f193SBarry Smith } 4116861f193SBarry Smith 41280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 413d71ae5a4SJacob Faibussowitsch { 4146861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4156861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4166861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4176861f193SBarry Smith PetscScalar *y; 4186861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4196861f193SBarry Smith PetscInt n, i, k; 4206861f193SBarry Smith 4216861f193SBarry Smith PetscFunctionBegin; 4229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4239566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 4249566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4256861f193SBarry Smith for (i = 0; i < m; i++) { 426*8e3a54c0SPierre Jolivet idx = PetscSafePointerPlusOffset(a->j, a->i[i]); 427*8e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(a->a, a->i[i]); 4286861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4296861f193SBarry Smith alpha = x + dof * i; 4306861f193SBarry Smith while (n-- > 0) { 431ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4329371c9d4SSatish Balay idx++; 4339371c9d4SSatish Balay v++; 4346861f193SBarry Smith } 4356861f193SBarry Smith } 4369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4406861f193SBarry Smith } 4416861f193SBarry Smith 44280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 443d71ae5a4SJacob Faibussowitsch { 4446861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4456861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4466861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4476861f193SBarry Smith PetscScalar *y; 4486861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4496861f193SBarry Smith PetscInt n, i, k; 4506861f193SBarry Smith 4516861f193SBarry Smith PetscFunctionBegin; 4529566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4549566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 4556861f193SBarry Smith for (i = 0; i < m; i++) { 4566861f193SBarry Smith idx = a->j + a->i[i]; 4576861f193SBarry Smith v = a->a + a->i[i]; 4586861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4596861f193SBarry Smith alpha = x + dof * i; 4606861f193SBarry Smith while (n-- > 0) { 461ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4629371c9d4SSatish Balay idx++; 4639371c9d4SSatish Balay v++; 4646861f193SBarry Smith } 4656861f193SBarry Smith } 4669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4706861f193SBarry Smith } 4716861f193SBarry Smith 47280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 473d71ae5a4SJacob Faibussowitsch { 4744cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 475f4a53059SBarry Smith 476b24ad042SBarry Smith PetscFunctionBegin; 477f4a53059SBarry Smith /* start the scatter */ 4789566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4799566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 4809566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4819566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 4823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 483f4a53059SBarry Smith } 484f4a53059SBarry Smith 48580eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 486d71ae5a4SJacob Faibussowitsch { 4874cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 488b24ad042SBarry Smith 4894cb9d9b8SBarry Smith PetscFunctionBegin; 4909566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 4919566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 4929566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 4939566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 4943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4954cb9d9b8SBarry Smith } 4964cb9d9b8SBarry Smith 49780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 498d71ae5a4SJacob Faibussowitsch { 4994cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 5004cb9d9b8SBarry Smith 501b24ad042SBarry Smith PetscFunctionBegin; 5024cb9d9b8SBarry Smith /* start the scatter */ 5039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5049566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 5059566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5069566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 5073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5084cb9d9b8SBarry Smith } 5094cb9d9b8SBarry Smith 51080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 511d71ae5a4SJacob Faibussowitsch { 5124cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 513b24ad042SBarry Smith 5144cb9d9b8SBarry Smith PetscFunctionBegin; 5159566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 5169566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 5179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5204cb9d9b8SBarry Smith } 5214cb9d9b8SBarry Smith 52280eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 523d71ae5a4SJacob Faibussowitsch { 5244222ddf1SHong Zhang Mat_Product *product = C->product; 5254222ddf1SHong Zhang 5264222ddf1SHong Zhang PetscFunctionBegin; 5274222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 5284222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 52998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 5303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5314222ddf1SHong Zhang } 5324222ddf1SHong Zhang 53380eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 534d71ae5a4SJacob Faibussowitsch { 5354222ddf1SHong Zhang Mat_Product *product = C->product; 5364222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 5374222ddf1SHong Zhang Mat A = product->A, P = product->B; 5384222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 5394222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 5404222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 5414222ddf1SHong Zhang PetscInt nalg = 4; 5424222ddf1SHong Zhang #else 5434222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 5444222ddf1SHong Zhang PetscInt nalg = 5; 5454222ddf1SHong Zhang #endif 5464222ddf1SHong Zhang 5474222ddf1SHong Zhang PetscFunctionBegin; 54808401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 5494222ddf1SHong Zhang 5504222ddf1SHong Zhang /* PtAP */ 5514222ddf1SHong Zhang /* Check matrix local sizes */ 5529371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5539371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 5549371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5559371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 5564222ddf1SHong Zhang 5574222ddf1SHong Zhang /* Set the default algorithm */ 5589566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 55948a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 5604222ddf1SHong Zhang 5614222ddf1SHong Zhang /* Get runtime option */ 562d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 5639566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 56448a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 565d0609cedSBarry Smith PetscOptionsEnd(); 5664222ddf1SHong Zhang 5679566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 5684222ddf1SHong Zhang if (flg) { 5694222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5714222ddf1SHong Zhang } 5724222ddf1SHong Zhang 5739566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 5744222ddf1SHong Zhang if (flg) { 5754222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5774222ddf1SHong Zhang } 5784222ddf1SHong Zhang 5794222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 5809566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 5819566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 5829566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 5833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5844222ddf1SHong Zhang } 5854222ddf1SHong Zhang 58680eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 58780eb40d0SJacob Faibussowitsch { 58880eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */ 58980eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 59080eb40d0SJacob Faibussowitsch Mat P = pp->AIJ; 59180eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 59280eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 59380eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 59480eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 59580eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj; 59680eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 59780eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 59880eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 59980eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa; 60080eb40d0SJacob Faibussowitsch 60180eb40d0SJacob Faibussowitsch PetscFunctionBegin; 60280eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */ 60380eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 60480eb40d0SJacob Faibussowitsch 60580eb40d0SJacob Faibussowitsch /* Clear old values in C */ 60680eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 60780eb40d0SJacob Faibussowitsch 60880eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) { 60980eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */ 61080eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i]; 61180eb40d0SJacob Faibussowitsch apnzj = 0; 61280eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) { 61380eb40d0SJacob Faibussowitsch /* Get offset within block of P */ 61480eb40d0SJacob Faibussowitsch pshift = *aj % ppdof; 61580eb40d0SJacob Faibussowitsch /* Get block row of P */ 61680eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */ 61780eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow]; 61880eb40d0SJacob Faibussowitsch pjj = pj + pi[prow]; 61980eb40d0SJacob Faibussowitsch paj = pa + pi[prow]; 62080eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) { 62180eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift; 62280eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) { 62380eb40d0SJacob Faibussowitsch apjdense[poffset] = -1; 62480eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset; 62580eb40d0SJacob Faibussowitsch } 62680eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k]; 62780eb40d0SJacob Faibussowitsch } 62880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 62980eb40d0SJacob Faibussowitsch aa++; 63080eb40d0SJacob Faibussowitsch } 63180eb40d0SJacob Faibussowitsch 63280eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */ 63380eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */ 63480eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 63580eb40d0SJacob Faibussowitsch 63680eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 63780eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */ 63880eb40d0SJacob Faibussowitsch pshift = i % ppdof; 63980eb40d0SJacob Faibussowitsch poffset = pi[prow]; 64080eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset; 64180eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 64280eb40d0SJacob Faibussowitsch pJ = pj + poffset; 64380eb40d0SJacob Faibussowitsch pA = pa + poffset; 64480eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) { 64580eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift; 64680eb40d0SJacob Faibussowitsch cjj = cj + ci[crow]; 64780eb40d0SJacob Faibussowitsch caj = ca + ci[crow]; 64880eb40d0SJacob Faibussowitsch pJ++; 64980eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */ 65080eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) { 65180eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 65280eb40d0SJacob Faibussowitsch } 65380eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 65480eb40d0SJacob Faibussowitsch pA++; 65580eb40d0SJacob Faibussowitsch } 65680eb40d0SJacob Faibussowitsch 65780eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */ 65880eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) { 65980eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.; 66080eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0; 66180eb40d0SJacob Faibussowitsch } 66280eb40d0SJacob Faibussowitsch } 66380eb40d0SJacob Faibussowitsch 66480eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */ 66580eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 66680eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 66780eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 66880eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 66980eb40d0SJacob Faibussowitsch } 67080eb40d0SJacob Faibussowitsch 67180eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 672d71ae5a4SJacob Faibussowitsch { 6730298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 6747ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 6757ba1a0bfSKris Buschelman Mat P = pp->AIJ; 6767ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 677d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 6787ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 679d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 680d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 6817ba1a0bfSKris Buschelman MatScalar *ca; 682d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 6837ba1a0bfSKris Buschelman 6847ba1a0bfSKris Buschelman PetscFunctionBegin; 6857ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 6869566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 6877ba1a0bfSKris Buschelman 6887ba1a0bfSKris Buschelman cn = pn * ppdof; 6897ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 6907ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 6919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 6927ba1a0bfSKris Buschelman ci[0] = 0; 6937ba1a0bfSKris Buschelman 6947ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 6959566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 6969566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 6979566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 6987ba1a0bfSKris Buschelman 6997ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 7007ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 7017ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 7029566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 7037ba1a0bfSKris Buschelman current_space = free_space; 7047ba1a0bfSKris Buschelman 7057ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 7067ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 7077ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 7087ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 7097ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 7107ba1a0bfSKris Buschelman ptanzi = 0; 7117ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 7127ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 7137ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 7147ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 7157ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 7167ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 7177ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 7187ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 7197ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 7207ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 7217ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7227ba1a0bfSKris Buschelman } 7237ba1a0bfSKris Buschelman } 7247ba1a0bfSKris Buschelman } 7257ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7267ba1a0bfSKris Buschelman ptaj = ptasparserow; 7277ba1a0bfSKris Buschelman cnzi = 0; 7287ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 7297ba1a0bfSKris Buschelman /* Get offset within block of P */ 7307ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 7317ba1a0bfSKris Buschelman /* Get block row of P */ 7327ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 7337ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 7347ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 7357ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 7367ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 7377ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 7387ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 7397ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 7407ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 7417ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 7427ba1a0bfSKris Buschelman } 7437ba1a0bfSKris Buschelman } 7447ba1a0bfSKris Buschelman } 7457ba1a0bfSKris Buschelman 7467ba1a0bfSKris Buschelman /* sort sparserow */ 7479566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 7487ba1a0bfSKris Buschelman 7497ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 7507ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 75148a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 7527ba1a0bfSKris Buschelman 7537ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 7549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 75526fbe8dcSKarl Rupp 7567ba1a0bfSKris Buschelman current_space->array += cnzi; 7577ba1a0bfSKris Buschelman current_space->local_used += cnzi; 7587ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 7597ba1a0bfSKris Buschelman 76026fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 76126fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 76226fbe8dcSKarl Rupp 7637ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7647ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 7657ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 7667ba1a0bfSKris Buschelman } 7677ba1a0bfSKris Buschelman } 7687ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7697ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7707ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 7719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 7729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 7739566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 7747ba1a0bfSKris Buschelman 7757ba1a0bfSKris Buschelman /* Allocate space for ca */ 7769566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 7777ba1a0bfSKris Buschelman 7787ba1a0bfSKris Buschelman /* put together the new matrix */ 7799566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 7809566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 7817ba1a0bfSKris Buschelman 7827ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7837ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 7844222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 785e6b907acSBarry Smith c->free_a = PETSC_TRUE; 786e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 7877ba1a0bfSKris Buschelman c->nonew = 0; 78826fbe8dcSKarl Rupp 7894222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 7904222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 7917ba1a0bfSKris Buschelman 7927ba1a0bfSKris Buschelman /* Clean up. */ 7939566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 7943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7957ba1a0bfSKris Buschelman } 7967ba1a0bfSKris Buschelman 797d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 798d71ae5a4SJacob Faibussowitsch { 7994222ddf1SHong Zhang Mat_Product *product = C->product; 8004222ddf1SHong Zhang Mat A = product->A, P = product->B; 8012121bac1SHong Zhang 8022121bac1SHong Zhang PetscFunctionBegin; 8039566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 8043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8052121bac1SHong Zhang } 8062121bac1SHong Zhang 807bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 808bc8e477aSFande Kong 809d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 810d71ae5a4SJacob Faibussowitsch { 811bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 812bc8e477aSFande Kong 813bc8e477aSFande Kong PetscFunctionBegin; 81434bcad68SFande Kong 8159566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 8163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 817bc8e477aSFande Kong } 818bc8e477aSFande Kong 8194222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 820bc8e477aSFande Kong 821d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 822d71ae5a4SJacob Faibussowitsch { 823bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 824bc8e477aSFande Kong 825bc8e477aSFande Kong PetscFunctionBegin; 8269566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 8274222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 8283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 829bc8e477aSFande Kong } 830bc8e477aSFande Kong 831bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 832bc8e477aSFande Kong 833d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 834d71ae5a4SJacob Faibussowitsch { 835bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 836bc8e477aSFande Kong 837bc8e477aSFande Kong PetscFunctionBegin; 83834bcad68SFande Kong 8399566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 8403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 841bc8e477aSFande Kong } 842bc8e477aSFande Kong 8434222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 844bc8e477aSFande Kong 845d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 846d71ae5a4SJacob Faibussowitsch { 847bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 848bc8e477aSFande Kong 849bc8e477aSFande Kong PetscFunctionBegin; 85034bcad68SFande Kong 8519566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 8524222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 8533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 854bc8e477aSFande Kong } 855bc8e477aSFande Kong 856d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 857d71ae5a4SJacob Faibussowitsch { 8584222ddf1SHong Zhang Mat_Product *product = C->product; 8594222ddf1SHong Zhang Mat A = product->A, P = product->B; 8604222ddf1SHong Zhang PetscBool flg; 861bc8e477aSFande Kong 862bc8e477aSFande Kong PetscFunctionBegin; 8639566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 8644222ddf1SHong Zhang if (flg) { 8659566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 8664222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 868bc8e477aSFande Kong } 86934bcad68SFande Kong 8709566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 8714222ddf1SHong Zhang if (flg) { 8729566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 8734222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8754222ddf1SHong Zhang } 87634bcad68SFande Kong 8774222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 878bc8e477aSFande Kong } 879bc8e477aSFande Kong 880d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 881d71ae5a4SJacob Faibussowitsch { 8829c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 883ceb03754SKris Buschelman Mat a = b->AIJ, B; 8849c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 8850fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 886ba8c8a56SBarry Smith PetscInt *cols; 887ba8c8a56SBarry Smith PetscScalar *vals; 8889c4fc4c7SBarry Smith 8899c4fc4c7SBarry Smith PetscFunctionBegin; 8909566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 8919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 8929c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 8939c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 89426fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 8959c4fc4c7SBarry Smith } 8969566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 8979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 8989566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 8999566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 9009566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 9019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 9029c4fc4c7SBarry Smith ii = 0; 9039c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 9049566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9050fd73130SBarry Smith for (j = 0; j < dof; j++) { 90626fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 9079566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9089c4fc4c7SBarry Smith ii++; 9099c4fc4c7SBarry Smith } 9109566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9119c4fc4c7SBarry Smith } 9129566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 9139566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9149566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 915ceb03754SKris Buschelman 916511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9179566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 918c3d102feSKris Buschelman } else { 919ceb03754SKris Buschelman *newmat = B; 920c3d102feSKris Buschelman } 9213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9229c4fc4c7SBarry Smith } 9239c4fc4c7SBarry Smith 924c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 925be1d678aSKris Buschelman 926d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 927d71ae5a4SJacob Faibussowitsch { 9280fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 929ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 9300fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 9310fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 9320fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 9330fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 9340298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 9350298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 9360fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 9370fd73130SBarry Smith PetscScalar *vals, *ovals; 9380fd73130SBarry Smith 9390fd73130SBarry Smith PetscFunctionBegin; 9409566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 941d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9420fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 9430fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 9440fd73130SBarry Smith for (j = 0; j < dof; j++) { 9450fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 9460fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 9470fd73130SBarry Smith } 9480fd73130SBarry Smith } 9499566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 9509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 9519566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 9529566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 9539566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 9549566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 9550fd73130SBarry Smith 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 957d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 958d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 9590fd73130SBarry Smith garray = mpiaij->garray; 9600fd73130SBarry Smith 9610fd73130SBarry Smith ii = rstart; 962d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9639566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9649566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9650fd73130SBarry Smith for (j = 0; j < dof; j++) { 966ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 967ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 9689566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9699566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 9700fd73130SBarry Smith ii++; 9710fd73130SBarry Smith } 9729566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9739566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9740fd73130SBarry Smith } 9759566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 9760fd73130SBarry Smith 9779566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9789566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 979ceb03754SKris Buschelman 980511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9817adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 9827adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 98326fbe8dcSKarl Rupp 9849566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 98526fbe8dcSKarl Rupp 9867adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 987c3d102feSKris Buschelman } else { 988ceb03754SKris Buschelman *newmat = B; 989c3d102feSKris Buschelman } 9903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9910fd73130SBarry Smith } 9920fd73130SBarry Smith 99380eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 994d71ae5a4SJacob Faibussowitsch { 9959e516c8fSBarry Smith Mat A; 9969e516c8fSBarry Smith 9979e516c8fSBarry Smith PetscFunctionBegin; 9989566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 9999566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 10009566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10029e516c8fSBarry Smith } 10030fd73130SBarry Smith 100480eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1005d71ae5a4SJacob Faibussowitsch { 1006ec626240SStefano Zampini Mat A; 1007ec626240SStefano Zampini 1008ec626240SStefano Zampini PetscFunctionBegin; 10099566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 10109566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 10119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1013ec626240SStefano Zampini } 1014ec626240SStefano Zampini 1015480dffcfSJed Brown /*@ 10160bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 10170bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 101811a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 101911a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 10200bad9183SKris Buschelman 1021ff585edeSJed Brown Collective 1022ff585edeSJed Brown 1023ff585edeSJed Brown Input Parameters: 102411a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 1025ff585edeSJed Brown - dof - the block size (number of components per node) 1026ff585edeSJed Brown 1027ff585edeSJed Brown Output Parameter: 102811a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 1029ff585edeSJed Brown 10302ef1f0ffSBarry Smith Level: advanced 10312ef1f0ffSBarry Smith 1032fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()` 1033ff585edeSJed Brown @*/ 1034d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1035d71ae5a4SJacob Faibussowitsch { 1036b24ad042SBarry Smith PetscInt n; 103782b1193eSBarry Smith Mat B; 103890f67b22SBarry Smith PetscBool flg; 1039fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1040fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1041fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1042fdc842d1SBarry Smith #endif 104382b1193eSBarry Smith 104482b1193eSBarry Smith PetscFunctionBegin; 1045fdc842d1SBarry Smith dof = PetscAbs(dof); 10469566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 1047d72c5c08SBarry Smith 104826fbe8dcSKarl Rupp if (dof == 1) *maij = A; 104926fbe8dcSKarl Rupp else { 10509566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1051c248e2fdSStefano Zampini /* propagate vec type */ 10529566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 10539566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 10549566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 10559566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 10569566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 10579566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 105826fbe8dcSKarl Rupp 1059cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1060d72c5c08SBarry Smith 106190f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 106290f67b22SBarry Smith if (flg) { 1063feb9fe23SJed Brown Mat_SeqMAIJ *b; 1064feb9fe23SJed Brown 10659566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 106626fbe8dcSKarl Rupp 10670298fd71SBarry Smith B->ops->setup = NULL; 10684cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 10690fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 10704222ddf1SHong Zhang 1071feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 1072b9b97703SBarry Smith b->dof = dof; 10734cb9d9b8SBarry Smith b->AIJ = A; 107426fbe8dcSKarl Rupp 1075d72c5c08SBarry Smith if (dof == 2) { 1076cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1077cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1078cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1079cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1080bcc973b7SBarry Smith } else if (dof == 3) { 1081bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1082bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1083bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1084bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1085bcc973b7SBarry Smith } else if (dof == 4) { 1086bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1087bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1088bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1089bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1090f9fae5adSBarry Smith } else if (dof == 5) { 1091f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1092f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1093f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1094f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 10956cd98798SBarry Smith } else if (dof == 6) { 10966cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 10976cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 10986cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 10996cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1100ed8eea03SBarry Smith } else if (dof == 7) { 1101ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 1102ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1103ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1104ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 110566ed3db0SBarry Smith } else if (dof == 8) { 110666ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 110766ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 110866ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 110966ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 11102b692628SMatthew Knepley } else if (dof == 9) { 11112b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 11122b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 11132b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 11142b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1115b9cda22cSBarry Smith } else if (dof == 10) { 1116b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 1117b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1118b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1119b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1120dbdd7285SBarry Smith } else if (dof == 11) { 1121dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 1122dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1123dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1124dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 11252f7816d4SBarry Smith } else if (dof == 16) { 11262f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 11272f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 11282f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 11292f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1130ed1c418cSBarry Smith } else if (dof == 18) { 1131ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 1132ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1133ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1134ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 113582b1193eSBarry Smith } else { 11366861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 11376861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 11386861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 11396861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 114082b1193eSBarry Smith } 1141fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 11429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1143fdc842d1SBarry Smith #endif 11449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 11459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1146f4a53059SBarry Smith } else { 1147f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1148feb9fe23SJed Brown Mat_MPIMAIJ *b; 1149f4a53059SBarry Smith IS from, to; 1150f4a53059SBarry Smith Vec gvec; 1151f4a53059SBarry Smith 11529566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 115326fbe8dcSKarl Rupp 11540298fd71SBarry Smith B->ops->setup = NULL; 1155d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 11560fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 115726fbe8dcSKarl Rupp 1158b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 1159b9b97703SBarry Smith b->dof = dof; 1160b9b97703SBarry Smith b->A = A; 116126fbe8dcSKarl Rupp 11629566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 11639566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 11644cb9d9b8SBarry Smith 11659566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 11669566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 11679566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 11689566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 11699566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 1170f4a53059SBarry Smith 1171f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 11729566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 11739566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1174f4a53059SBarry Smith 1175f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 11769566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1177f4a53059SBarry Smith 1178f4a53059SBarry Smith /* generate the scatter context */ 11799566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1180f4a53059SBarry Smith 11819566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 11829566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 11839566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1184f4a53059SBarry Smith 1185f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 11864cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 11874cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 11884cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 118926fbe8dcSKarl Rupp 1190fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 11919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1192fdc842d1SBarry Smith #endif 11939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 11949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1195f4a53059SBarry Smith } 11967dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1197ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 11989566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 1199fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1200fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 1201fdc842d1SBarry Smith { 1202fdc842d1SBarry Smith PetscBool flg; 1203fdc842d1SBarry Smith if (convert) { 12049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 120548a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1206fdc842d1SBarry Smith } 1207fdc842d1SBarry Smith } 1208fdc842d1SBarry Smith #endif 1209cd3bbe55SBarry Smith *maij = B; 1210d72c5c08SBarry Smith } 12113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 121282b1193eSBarry Smith } 1213