1c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 2c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 382b1193eSBarry Smith 4b350b9acSSatish Balay /*@ 511a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 6ff585edeSJed Brown 711a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 8ff585edeSJed Brown 9ff585edeSJed Brown Input Parameter: 1011a5261eSBarry Smith . A - the `MATMAIJ` matrix 11ff585edeSJed Brown 12ff585edeSJed Brown Output Parameter: 1311a5261eSBarry Smith . B - the `MATAIJ` matrix 14ff585edeSJed Brown 15ff585edeSJed Brown Level: advanced 16ff585edeSJed Brown 1711a5261eSBarry Smith Note: 1811a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 19ff585edeSJed Brown 201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 21ff585edeSJed Brown @*/ 22d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 23d71ae5a4SJacob Faibussowitsch { 24ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 25b9b97703SBarry Smith 26b9b97703SBarry Smith PetscFunctionBegin; 279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29b9b97703SBarry Smith if (ismpimaij) { 30b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31b9b97703SBarry Smith 32b9b97703SBarry Smith *B = b->A; 33b9b97703SBarry Smith } else if (isseqmaij) { 34b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35b9b97703SBarry Smith 36b9b97703SBarry Smith *B = b->AIJ; 37b9b97703SBarry Smith } else { 38b9b97703SBarry Smith *B = A; 39b9b97703SBarry Smith } 403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41b9b97703SBarry Smith } 42b9b97703SBarry Smith 43480dffcfSJed Brown /*@ 4411a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45ff585edeSJed Brown 463f9fe445SBarry Smith Logically Collective 47ff585edeSJed Brown 48d8d19677SJose E. Roman Input Parameters: 4911a5261eSBarry Smith + A - the `MATMAIJ` matrix 50ff585edeSJed Brown - dof - the block size for the new matrix 51ff585edeSJed Brown 52ff585edeSJed Brown Output Parameter: 5311a5261eSBarry Smith . B - the new `MATMAIJ` matrix 54ff585edeSJed Brown 55ff585edeSJed Brown Level: advanced 56ff585edeSJed Brown 571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 58ff585edeSJed Brown @*/ 59d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 60d71ae5a4SJacob Faibussowitsch { 610298fd71SBarry Smith Mat Aij = NULL; 62b9b97703SBarry Smith 63b9b97703SBarry Smith PetscFunctionBegin; 64c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 659566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 669566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 68b9b97703SBarry Smith } 69b9b97703SBarry Smith 7080eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 71d71ae5a4SJacob Faibussowitsch { 724cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7382b1193eSBarry Smith 7482b1193eSBarry Smith PetscFunctionBegin; 759566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 769566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 772e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 814cb9d9b8SBarry Smith } 824cb9d9b8SBarry Smith 8380eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A) 84d71ae5a4SJacob Faibussowitsch { 85356c569eSBarry Smith PetscFunctionBegin; 86ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 87356c569eSBarry Smith } 88356c569eSBarry Smith 8980eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 90d71ae5a4SJacob Faibussowitsch { 910fd73130SBarry Smith Mat B; 920fd73130SBarry Smith 930fd73130SBarry Smith PetscFunctionBegin; 949566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 959566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 969566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 980fd73130SBarry Smith } 990fd73130SBarry Smith 10080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 101d71ae5a4SJacob Faibussowitsch { 1020fd73130SBarry Smith Mat B; 1030fd73130SBarry Smith 1040fd73130SBarry Smith PetscFunctionBegin; 1059566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1069566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1090fd73130SBarry Smith } 1100fd73130SBarry Smith 11180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 112d71ae5a4SJacob Faibussowitsch { 1134cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1144cb9d9b8SBarry Smith 1154cb9d9b8SBarry Smith PetscFunctionBegin; 1169566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1199566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1209566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1219566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1222e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 127b9b97703SBarry Smith } 128b9b97703SBarry Smith 1290bad9183SKris Buschelman /*MC 130fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1310bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13211a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1330bad9183SKris Buschelman 1340bad9183SKris Buschelman Operations provided: 13567b8a455SSatish Balay .vb 13611a5261eSBarry Smith MatMult() 13711a5261eSBarry Smith MatMultTranspose() 13811a5261eSBarry Smith MatMultAdd() 13911a5261eSBarry Smith MatMultTransposeAdd() 14067b8a455SSatish Balay .ve 1410bad9183SKris Buschelman 1420bad9183SKris Buschelman Level: advanced 1430bad9183SKris Buschelman 1441cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1450bad9183SKris Buschelman M*/ 1460bad9183SKris Buschelman 147d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 148d71ae5a4SJacob Faibussowitsch { 1494cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15064b52464SHong Zhang PetscMPIInt size; 15182b1193eSBarry Smith 15282b1193eSBarry Smith PetscFunctionBegin; 1534dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 154b0a32e0cSBarry Smith A->data = (void *)b; 15526fbe8dcSKarl Rupp 1569566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15726fbe8dcSKarl Rupp 158356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 159f4a53059SBarry Smith 160f4259b30SLisandro Dalcin b->AIJ = NULL; 161cd3bbe55SBarry Smith b->dof = 0; 162f4259b30SLisandro Dalcin b->OAIJ = NULL; 163f4259b30SLisandro Dalcin b->ctx = NULL; 164f4259b30SLisandro Dalcin b->w = NULL; 1659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16664b52464SHong Zhang if (size == 1) { 1679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16864b52464SHong Zhang } else { 1699566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17064b52464SHong Zhang } 17132e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17232e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 1733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17482b1193eSBarry Smith } 17582b1193eSBarry Smith 17680eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline) 17780eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline)) 17880eb40d0SJacob Faibussowitsch #else 17980eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE 18080eb40d0SJacob Faibussowitsch #endif 181bb5b24f5SJacob Faibussowitsch 1829927e305SJacob Faibussowitsch #if defined(__clang__) 183bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 184bb5b24f5SJacob Faibussowitsch #else 185bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL 186bb5b24f5SJacob Faibussowitsch #endif 187bb5b24f5SJacob Faibussowitsch 18880eb40d0SJacob Faibussowitsch enum { 18980eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 19080eb40d0SJacob Faibussowitsch }; 19180eb40d0SJacob Faibussowitsch 19280eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 19380eb40d0SJacob Faibussowitsch // keyword into account for these... 19480eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 19580eb40d0SJacob Faibussowitsch { 19680eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 19780eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19880eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 19980eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 20080eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 20180eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 20280eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j; 20380eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i; 20480eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a; 20580eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0; 20680eb40d0SJacob Faibussowitsch const PetscScalar *x; 20780eb40d0SJacob Faibussowitsch PetscScalar *z; 20880eb40d0SJacob Faibussowitsch 20980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 21080eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 21180eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 21280eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21380eb40d0SJacob Faibussowitsch if (mult_add) { 21480eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 21580eb40d0SJacob Faibussowitsch } else { 21680eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z)); 21780eb40d0SJacob Faibussowitsch } 21880eb40d0SJacob Faibussowitsch 21980eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) { 22080eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i]; 22180eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow; 22280eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls 22380eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 22480eb40d0SJacob Faibussowitsch 22580eb40d0SJacob Faibussowitsch nonzerorow += n > 0; 22680eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) { 22780eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow]; 22880eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow]; 22980eb40d0SJacob Faibussowitsch 230bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23180eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 23280eb40d0SJacob Faibussowitsch } 23380eb40d0SJacob Faibussowitsch 234bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23580eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) { 23680eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k; 23780eb40d0SJacob Faibussowitsch 23880eb40d0SJacob Faibussowitsch if (mult_add) { 23980eb40d0SJacob Faibussowitsch z[z_idx] += sum[k]; 24080eb40d0SJacob Faibussowitsch } else { 24180eb40d0SJacob Faibussowitsch z[z_idx] = sum[k]; 24280eb40d0SJacob Faibussowitsch } 24380eb40d0SJacob Faibussowitsch } 24480eb40d0SJacob Faibussowitsch } 24580eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 24680eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24780eb40d0SJacob Faibussowitsch if (mult_add) { 24880eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 24980eb40d0SJacob Faibussowitsch } else { 25080eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z)); 25180eb40d0SJacob Faibussowitsch } 25280eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25380eb40d0SJacob Faibussowitsch } 25480eb40d0SJacob Faibussowitsch 25580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 25680eb40d0SJacob Faibussowitsch { 25780eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 25880eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 25980eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 26080eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 26180eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 26280eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 26380eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j; 26480eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i; 26580eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a; 26680eb40d0SJacob Faibussowitsch const PetscScalar *x; 26780eb40d0SJacob Faibussowitsch PetscScalar *z; 26880eb40d0SJacob Faibussowitsch 26980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 27080eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 27180eb40d0SJacob Faibussowitsch if (mult_add) { 27280eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 27380eb40d0SJacob Faibussowitsch } else { 27480eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 27580eb40d0SJacob Faibussowitsch } 27680eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27780eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 27880eb40d0SJacob Faibussowitsch 27980eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) { 28080eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i]; 2818e3a54c0SPierre Jolivet const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii); 2828e3a54c0SPierre Jolivet const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii); 28380eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii; 28480eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 28580eb40d0SJacob Faibussowitsch 286bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 28780eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 28880eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) { 28980eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j]; 29080eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j]; 29180eb40d0SJacob Faibussowitsch 292bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 29380eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 29480eb40d0SJacob Faibussowitsch } 29580eb40d0SJacob Faibussowitsch } 29680eb40d0SJacob Faibussowitsch 29780eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz)); 29880eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 29980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 30080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30180eb40d0SJacob Faibussowitsch } 30280eb40d0SJacob Faibussowitsch 3039927e305SJacob Faibussowitsch #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 3049927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 3059927e305SJacob Faibussowitsch { \ 3069927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3079927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 3089927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3099927e305SJacob Faibussowitsch } \ 3109927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 3119927e305SJacob Faibussowitsch { \ 3129927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3139927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 3149927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3159927e305SJacob Faibussowitsch } \ 3169927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 3179927e305SJacob Faibussowitsch { \ 3189927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3199927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 3209927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3219927e305SJacob Faibussowitsch } \ 3229927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 3239927e305SJacob Faibussowitsch { \ 3249927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3259927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 3269927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 32782b1193eSBarry Smith } 328bcc973b7SBarry Smith 3299927e305SJacob Faibussowitsch // clang-format off 3309927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 3319927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 3329927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 3339927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 3349927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 3359927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 3369927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 3379927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 3389927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 3399927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 3409927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 3419927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 3429927e305SJacob Faibussowitsch // clang-format on 343bcc973b7SBarry Smith 3449927e305SJacob Faibussowitsch #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 345ed1c418cSBarry Smith 34680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 347d71ae5a4SJacob Faibussowitsch { 3486861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3496861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3506861f193SBarry Smith const PetscScalar *x, *v; 3516861f193SBarry Smith PetscScalar *y, *sums; 3526861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3536861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3546861f193SBarry Smith 3556861f193SBarry Smith PetscFunctionBegin; 3569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3579566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3589566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3596861f193SBarry Smith idx = a->j; 3606861f193SBarry Smith v = a->a; 3616861f193SBarry Smith ii = a->i; 3626861f193SBarry Smith 3636861f193SBarry Smith for (i = 0; i < m; i++) { 3646861f193SBarry Smith jrow = ii[i]; 3656861f193SBarry Smith n = ii[i + 1] - jrow; 3666861f193SBarry Smith sums = y + dof * i; 3676861f193SBarry Smith for (j = 0; j < n; j++) { 368ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 3696861f193SBarry Smith jrow++; 3706861f193SBarry Smith } 3716861f193SBarry Smith } 3726861f193SBarry Smith 3739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 3749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 3763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3776861f193SBarry Smith } 3786861f193SBarry Smith 37980eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 380d71ae5a4SJacob Faibussowitsch { 3816861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3826861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3836861f193SBarry Smith const PetscScalar *x, *v; 3846861f193SBarry Smith PetscScalar *y, *sums; 3856861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3866861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3876861f193SBarry Smith 3886861f193SBarry Smith PetscFunctionBegin; 3899566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3909566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3919566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 3926861f193SBarry Smith idx = a->j; 3936861f193SBarry Smith v = a->a; 3946861f193SBarry Smith ii = a->i; 3956861f193SBarry Smith 3966861f193SBarry Smith for (i = 0; i < m; i++) { 3976861f193SBarry Smith jrow = ii[i]; 3986861f193SBarry Smith n = ii[i + 1] - jrow; 3996861f193SBarry Smith sums = y + dof * i; 4006861f193SBarry Smith for (j = 0; j < n; j++) { 401ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 4026861f193SBarry Smith jrow++; 4036861f193SBarry Smith } 4046861f193SBarry Smith } 4056861f193SBarry Smith 4069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4106861f193SBarry Smith } 4116861f193SBarry Smith 41280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 413d71ae5a4SJacob Faibussowitsch { 4146861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4156861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4166861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4176861f193SBarry Smith PetscScalar *y; 4186861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4196861f193SBarry Smith PetscInt n, i, k; 4206861f193SBarry Smith 4216861f193SBarry Smith PetscFunctionBegin; 4229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4239566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 4249566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4256861f193SBarry Smith for (i = 0; i < m; i++) { 4268e3a54c0SPierre Jolivet idx = PetscSafePointerPlusOffset(a->j, a->i[i]); 4278e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(a->a, a->i[i]); 4286861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4296861f193SBarry Smith alpha = x + dof * i; 4306861f193SBarry Smith while (n-- > 0) { 431ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4329371c9d4SSatish Balay idx++; 4339371c9d4SSatish Balay v++; 4346861f193SBarry Smith } 4356861f193SBarry Smith } 4369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4406861f193SBarry Smith } 4416861f193SBarry Smith 44280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 443d71ae5a4SJacob Faibussowitsch { 4446861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4456861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4466861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4476861f193SBarry Smith PetscScalar *y; 4486861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4496861f193SBarry Smith PetscInt n, i, k; 4506861f193SBarry Smith 4516861f193SBarry Smith PetscFunctionBegin; 4529566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4549566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 4556861f193SBarry Smith for (i = 0; i < m; i++) { 4566861f193SBarry Smith idx = a->j + a->i[i]; 4576861f193SBarry Smith v = a->a + a->i[i]; 4586861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4596861f193SBarry Smith alpha = x + dof * i; 4606861f193SBarry Smith while (n-- > 0) { 461ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4629371c9d4SSatish Balay idx++; 4639371c9d4SSatish Balay v++; 4646861f193SBarry Smith } 4656861f193SBarry Smith } 4669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4706861f193SBarry Smith } 4716861f193SBarry Smith 47280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 473d71ae5a4SJacob Faibussowitsch { 4744cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 475f4a53059SBarry Smith 476b24ad042SBarry Smith PetscFunctionBegin; 477f4a53059SBarry Smith /* start the scatter */ 4789566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4799566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 4809566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4819566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 4823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 483f4a53059SBarry Smith } 484f4a53059SBarry Smith 48580eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 486d71ae5a4SJacob Faibussowitsch { 4874cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 488b24ad042SBarry Smith 4894cb9d9b8SBarry Smith PetscFunctionBegin; 4909566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 4919566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 4929566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 4939566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 4943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4954cb9d9b8SBarry Smith } 4964cb9d9b8SBarry Smith 49780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 498d71ae5a4SJacob Faibussowitsch { 4994cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 5004cb9d9b8SBarry Smith 501b24ad042SBarry Smith PetscFunctionBegin; 5024cb9d9b8SBarry Smith /* start the scatter */ 5039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5049566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 5059566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5069566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 5073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5084cb9d9b8SBarry Smith } 5094cb9d9b8SBarry Smith 51080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 511d71ae5a4SJacob Faibussowitsch { 5124cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 513b24ad042SBarry Smith 5144cb9d9b8SBarry Smith PetscFunctionBegin; 5159566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 5169566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 5179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5204cb9d9b8SBarry Smith } 5214cb9d9b8SBarry Smith 52280eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 523d71ae5a4SJacob Faibussowitsch { 5244222ddf1SHong Zhang Mat_Product *product = C->product; 5254222ddf1SHong Zhang 5264222ddf1SHong Zhang PetscFunctionBegin; 5274222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 5284222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 52998921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 5303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5314222ddf1SHong Zhang } 5324222ddf1SHong Zhang 53380eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 534d71ae5a4SJacob Faibussowitsch { 5354222ddf1SHong Zhang Mat_Product *product = C->product; 5364222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 5374222ddf1SHong Zhang Mat A = product->A, P = product->B; 5384222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 5394222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 5404222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 5414222ddf1SHong Zhang PetscInt nalg = 4; 5424222ddf1SHong Zhang #else 5434222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 5444222ddf1SHong Zhang PetscInt nalg = 5; 5454222ddf1SHong Zhang #endif 5464222ddf1SHong Zhang 5474222ddf1SHong Zhang PetscFunctionBegin; 54808401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 5494222ddf1SHong Zhang 5504222ddf1SHong Zhang /* PtAP */ 5514222ddf1SHong Zhang /* Check matrix local sizes */ 5529371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5539371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 5549371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5559371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 5564222ddf1SHong Zhang 5574222ddf1SHong Zhang /* Set the default algorithm */ 5589566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 55948a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 5604222ddf1SHong Zhang 5614222ddf1SHong Zhang /* Get runtime option */ 562d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 5639566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 56448a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 565d0609cedSBarry Smith PetscOptionsEnd(); 5664222ddf1SHong Zhang 5679566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 5684222ddf1SHong Zhang if (flg) { 5694222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5714222ddf1SHong Zhang } 5724222ddf1SHong Zhang 5739566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 5744222ddf1SHong Zhang if (flg) { 5754222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5774222ddf1SHong Zhang } 5784222ddf1SHong Zhang 5794222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 5809566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 5819566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 5829566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 5833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5844222ddf1SHong Zhang } 5854222ddf1SHong Zhang 58680eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 58780eb40d0SJacob Faibussowitsch { 58880eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */ 58980eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 59080eb40d0SJacob Faibussowitsch Mat P = pp->AIJ; 59180eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 59280eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 59380eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 59480eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 59580eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj; 59680eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 59780eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 59880eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 59980eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa; 60080eb40d0SJacob Faibussowitsch 60180eb40d0SJacob Faibussowitsch PetscFunctionBegin; 60280eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */ 60380eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 60480eb40d0SJacob Faibussowitsch 60580eb40d0SJacob Faibussowitsch /* Clear old values in C */ 60680eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 60780eb40d0SJacob Faibussowitsch 60880eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) { 60980eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */ 61080eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i]; 61180eb40d0SJacob Faibussowitsch apnzj = 0; 61280eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) { 61380eb40d0SJacob Faibussowitsch /* Get offset within block of P */ 61480eb40d0SJacob Faibussowitsch pshift = *aj % ppdof; 61580eb40d0SJacob Faibussowitsch /* Get block row of P */ 61680eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */ 61780eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow]; 61880eb40d0SJacob Faibussowitsch pjj = pj + pi[prow]; 61980eb40d0SJacob Faibussowitsch paj = pa + pi[prow]; 62080eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) { 62180eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift; 62280eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) { 62380eb40d0SJacob Faibussowitsch apjdense[poffset] = -1; 62480eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset; 62580eb40d0SJacob Faibussowitsch } 62680eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k]; 62780eb40d0SJacob Faibussowitsch } 62880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 62980eb40d0SJacob Faibussowitsch aa++; 63080eb40d0SJacob Faibussowitsch } 63180eb40d0SJacob Faibussowitsch 63280eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */ 63380eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */ 63480eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 63580eb40d0SJacob Faibussowitsch 63680eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 63780eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */ 63880eb40d0SJacob Faibussowitsch pshift = i % ppdof; 63980eb40d0SJacob Faibussowitsch poffset = pi[prow]; 64080eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset; 64180eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 64280eb40d0SJacob Faibussowitsch pJ = pj + poffset; 64380eb40d0SJacob Faibussowitsch pA = pa + poffset; 64480eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) { 64580eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift; 64680eb40d0SJacob Faibussowitsch cjj = cj + ci[crow]; 64780eb40d0SJacob Faibussowitsch caj = ca + ci[crow]; 64880eb40d0SJacob Faibussowitsch pJ++; 64980eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */ 65080eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) { 65180eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 65280eb40d0SJacob Faibussowitsch } 65380eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 65480eb40d0SJacob Faibussowitsch pA++; 65580eb40d0SJacob Faibussowitsch } 65680eb40d0SJacob Faibussowitsch 65780eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */ 65880eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) { 65980eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.; 66080eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0; 66180eb40d0SJacob Faibussowitsch } 66280eb40d0SJacob Faibussowitsch } 66380eb40d0SJacob Faibussowitsch 66480eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */ 66580eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 66680eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 66780eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 66880eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 66980eb40d0SJacob Faibussowitsch } 67080eb40d0SJacob Faibussowitsch 67180eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 672d71ae5a4SJacob Faibussowitsch { 6730298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 6747ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 6757ba1a0bfSKris Buschelman Mat P = pp->AIJ; 6767ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 677d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 6787ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 679d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 680d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 6817ba1a0bfSKris Buschelman MatScalar *ca; 682d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 6837ba1a0bfSKris Buschelman 6847ba1a0bfSKris Buschelman PetscFunctionBegin; 6857ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 6869566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 6877ba1a0bfSKris Buschelman 6887ba1a0bfSKris Buschelman cn = pn * ppdof; 6897ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 6907ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 6919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 6927ba1a0bfSKris Buschelman ci[0] = 0; 6937ba1a0bfSKris Buschelman 6947ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 6959566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 6969566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 6979566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 6987ba1a0bfSKris Buschelman 6997ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 7007ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 7017ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 7029566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 7037ba1a0bfSKris Buschelman current_space = free_space; 7047ba1a0bfSKris Buschelman 7057ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 7067ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 7077ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 7087ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 7097ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 7107ba1a0bfSKris Buschelman ptanzi = 0; 7117ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 7127ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 7137ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 7147ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 7157ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 7167ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 7177ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 7187ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 7197ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 7207ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 7217ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7227ba1a0bfSKris Buschelman } 7237ba1a0bfSKris Buschelman } 7247ba1a0bfSKris Buschelman } 7257ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7267ba1a0bfSKris Buschelman ptaj = ptasparserow; 7277ba1a0bfSKris Buschelman cnzi = 0; 7287ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 7297ba1a0bfSKris Buschelman /* Get offset within block of P */ 7307ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 7317ba1a0bfSKris Buschelman /* Get block row of P */ 7327ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 7337ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 7347ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 7357ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 7367ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 7377ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 7387ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 7397ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 7407ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 7417ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 7427ba1a0bfSKris Buschelman } 7437ba1a0bfSKris Buschelman } 7447ba1a0bfSKris Buschelman } 7457ba1a0bfSKris Buschelman 7467ba1a0bfSKris Buschelman /* sort sparserow */ 7479566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 7487ba1a0bfSKris Buschelman 7497ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 7507ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 75148a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 7527ba1a0bfSKris Buschelman 7537ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 7549566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 75526fbe8dcSKarl Rupp 7567ba1a0bfSKris Buschelman current_space->array += cnzi; 7577ba1a0bfSKris Buschelman current_space->local_used += cnzi; 7587ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 7597ba1a0bfSKris Buschelman 76026fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 76126fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 76226fbe8dcSKarl Rupp 7637ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7647ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 7657ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 7667ba1a0bfSKris Buschelman } 7677ba1a0bfSKris Buschelman } 7687ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7697ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7707ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 7719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 7729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 7739566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 7747ba1a0bfSKris Buschelman 7757ba1a0bfSKris Buschelman /* Allocate space for ca */ 7769566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 7777ba1a0bfSKris Buschelman 7787ba1a0bfSKris Buschelman /* put together the new matrix */ 7799566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 7809566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 7817ba1a0bfSKris Buschelman 7827ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7837ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 784*f4f49eeaSPierre Jolivet c = (Mat_SeqAIJ *)C->data; 785e6b907acSBarry Smith c->free_a = PETSC_TRUE; 786e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 7877ba1a0bfSKris Buschelman c->nonew = 0; 78826fbe8dcSKarl Rupp 7894222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 7904222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 7917ba1a0bfSKris Buschelman 7927ba1a0bfSKris Buschelman /* Clean up. */ 7939566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 7943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7957ba1a0bfSKris Buschelman } 7967ba1a0bfSKris Buschelman 797d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 798d71ae5a4SJacob Faibussowitsch { 7994222ddf1SHong Zhang Mat_Product *product = C->product; 8004222ddf1SHong Zhang Mat A = product->A, P = product->B; 8012121bac1SHong Zhang 8022121bac1SHong Zhang PetscFunctionBegin; 8039566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 8043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8052121bac1SHong Zhang } 8062121bac1SHong Zhang 807bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 808bc8e477aSFande Kong 809d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 810d71ae5a4SJacob Faibussowitsch { 811bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 812bc8e477aSFande Kong 813bc8e477aSFande Kong PetscFunctionBegin; 8149566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 8153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 816bc8e477aSFande Kong } 817bc8e477aSFande Kong 8184222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 819bc8e477aSFande Kong 820d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 821d71ae5a4SJacob Faibussowitsch { 822bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 823bc8e477aSFande Kong 824bc8e477aSFande Kong PetscFunctionBegin; 8259566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 8264222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 8273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 828bc8e477aSFande Kong } 829bc8e477aSFande Kong 830bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 831bc8e477aSFande Kong 832d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 833d71ae5a4SJacob Faibussowitsch { 834bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 835bc8e477aSFande Kong 836bc8e477aSFande Kong PetscFunctionBegin; 8379566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 8383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 839bc8e477aSFande Kong } 840bc8e477aSFande Kong 8414222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 842bc8e477aSFande Kong 843d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 844d71ae5a4SJacob Faibussowitsch { 845bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 846bc8e477aSFande Kong 847bc8e477aSFande Kong PetscFunctionBegin; 8489566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 8494222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 8503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 851bc8e477aSFande Kong } 852bc8e477aSFande Kong 853d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 854d71ae5a4SJacob Faibussowitsch { 8554222ddf1SHong Zhang Mat_Product *product = C->product; 8564222ddf1SHong Zhang Mat A = product->A, P = product->B; 8574222ddf1SHong Zhang PetscBool flg; 858bc8e477aSFande Kong 859bc8e477aSFande Kong PetscFunctionBegin; 8609566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 8614222ddf1SHong Zhang if (flg) { 8629566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 8634222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 865bc8e477aSFande Kong } 86634bcad68SFande Kong 8679566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 8684222ddf1SHong Zhang if (flg) { 8699566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 8704222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8724222ddf1SHong Zhang } 87334bcad68SFande Kong 8744222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 875bc8e477aSFande Kong } 876bc8e477aSFande Kong 877d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 878d71ae5a4SJacob Faibussowitsch { 8799c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 880ceb03754SKris Buschelman Mat a = b->AIJ, B; 8819c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 8820fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 883ba8c8a56SBarry Smith PetscInt *cols; 884ba8c8a56SBarry Smith PetscScalar *vals; 8859c4fc4c7SBarry Smith 8869c4fc4c7SBarry Smith PetscFunctionBegin; 8879566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 8889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 8899c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 8909c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 89126fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 8929c4fc4c7SBarry Smith } 8939566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 8949566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 8959566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 8969566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 8979566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 8989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 8999c4fc4c7SBarry Smith ii = 0; 9009c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 9019566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9020fd73130SBarry Smith for (j = 0; j < dof; j++) { 90326fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 9049566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9059c4fc4c7SBarry Smith ii++; 9069c4fc4c7SBarry Smith } 9079566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9089c4fc4c7SBarry Smith } 9099566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 9109566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9119566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 912ceb03754SKris Buschelman 913511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9149566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 915c3d102feSKris Buschelman } else { 916ceb03754SKris Buschelman *newmat = B; 917c3d102feSKris Buschelman } 9183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9199c4fc4c7SBarry Smith } 9209c4fc4c7SBarry Smith 921c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 922be1d678aSKris Buschelman 923d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 924d71ae5a4SJacob Faibussowitsch { 9250fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 926ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 9270fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 9280fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 9290fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 9300fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 9310298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 9320298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 9330fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 9340fd73130SBarry Smith PetscScalar *vals, *ovals; 9350fd73130SBarry Smith 9360fd73130SBarry Smith PetscFunctionBegin; 9379566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 938d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9390fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 9400fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 9410fd73130SBarry Smith for (j = 0; j < dof; j++) { 9420fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 9430fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 9440fd73130SBarry Smith } 9450fd73130SBarry Smith } 9469566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 9479566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 9489566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 9499566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 9509566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 9519566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 9520fd73130SBarry Smith 9539566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 954d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 955d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 9560fd73130SBarry Smith garray = mpiaij->garray; 9570fd73130SBarry Smith 9580fd73130SBarry Smith ii = rstart; 959d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9609566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9619566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9620fd73130SBarry Smith for (j = 0; j < dof; j++) { 963ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 964ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 9659566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9669566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 9670fd73130SBarry Smith ii++; 9680fd73130SBarry Smith } 9699566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9709566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9710fd73130SBarry Smith } 9729566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 9730fd73130SBarry Smith 9749566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9759566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 976ceb03754SKris Buschelman 977511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9787adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 9797adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 98026fbe8dcSKarl Rupp 9819566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 98226fbe8dcSKarl Rupp 9837adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 984c3d102feSKris Buschelman } else { 985ceb03754SKris Buschelman *newmat = B; 986c3d102feSKris Buschelman } 9873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9880fd73130SBarry Smith } 9890fd73130SBarry Smith 99080eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 991d71ae5a4SJacob Faibussowitsch { 9929e516c8fSBarry Smith Mat A; 9939e516c8fSBarry Smith 9949e516c8fSBarry Smith PetscFunctionBegin; 9959566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 9969566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 9979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 9983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9999e516c8fSBarry Smith } 10000fd73130SBarry Smith 100180eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1002d71ae5a4SJacob Faibussowitsch { 1003ec626240SStefano Zampini Mat A; 1004ec626240SStefano Zampini 1005ec626240SStefano Zampini PetscFunctionBegin; 10069566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 10079566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 10089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1010ec626240SStefano Zampini } 1011ec626240SStefano Zampini 1012480dffcfSJed Brown /*@ 10130bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 10140bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 101511a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 101611a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 10170bad9183SKris Buschelman 1018ff585edeSJed Brown Collective 1019ff585edeSJed Brown 1020ff585edeSJed Brown Input Parameters: 102111a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 1022ff585edeSJed Brown - dof - the block size (number of components per node) 1023ff585edeSJed Brown 1024ff585edeSJed Brown Output Parameter: 102511a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 1026ff585edeSJed Brown 10272ef1f0ffSBarry Smith Level: advanced 10282ef1f0ffSBarry Smith 1029fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()` 1030ff585edeSJed Brown @*/ 1031d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1032d71ae5a4SJacob Faibussowitsch { 1033b24ad042SBarry Smith PetscInt n; 103482b1193eSBarry Smith Mat B; 103590f67b22SBarry Smith PetscBool flg; 1036fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1037fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1038fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1039fdc842d1SBarry Smith #endif 104082b1193eSBarry Smith 104182b1193eSBarry Smith PetscFunctionBegin; 1042fdc842d1SBarry Smith dof = PetscAbs(dof); 10439566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 1044d72c5c08SBarry Smith 104526fbe8dcSKarl Rupp if (dof == 1) *maij = A; 104626fbe8dcSKarl Rupp else { 10479566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1048c248e2fdSStefano Zampini /* propagate vec type */ 10499566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 10509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 10519566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 10529566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 10539566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 10549566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 105526fbe8dcSKarl Rupp 1056cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1057d72c5c08SBarry Smith 105890f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 105990f67b22SBarry Smith if (flg) { 1060feb9fe23SJed Brown Mat_SeqMAIJ *b; 1061feb9fe23SJed Brown 10629566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 106326fbe8dcSKarl Rupp 10640298fd71SBarry Smith B->ops->setup = NULL; 10654cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 10660fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 10674222ddf1SHong Zhang 1068feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 1069b9b97703SBarry Smith b->dof = dof; 10704cb9d9b8SBarry Smith b->AIJ = A; 107126fbe8dcSKarl Rupp 1072d72c5c08SBarry Smith if (dof == 2) { 1073cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1074cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1075cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1076cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1077bcc973b7SBarry Smith } else if (dof == 3) { 1078bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1079bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1080bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1081bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1082bcc973b7SBarry Smith } else if (dof == 4) { 1083bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1084bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1085bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1086bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1087f9fae5adSBarry Smith } else if (dof == 5) { 1088f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1089f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1090f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1091f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 10926cd98798SBarry Smith } else if (dof == 6) { 10936cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 10946cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 10956cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 10966cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1097ed8eea03SBarry Smith } else if (dof == 7) { 1098ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 1099ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1100ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1101ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 110266ed3db0SBarry Smith } else if (dof == 8) { 110366ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 110466ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 110566ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 110666ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 11072b692628SMatthew Knepley } else if (dof == 9) { 11082b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 11092b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 11102b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 11112b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1112b9cda22cSBarry Smith } else if (dof == 10) { 1113b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 1114b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1115b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1116b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1117dbdd7285SBarry Smith } else if (dof == 11) { 1118dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 1119dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1120dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1121dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 11222f7816d4SBarry Smith } else if (dof == 16) { 11232f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 11242f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 11252f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 11262f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1127ed1c418cSBarry Smith } else if (dof == 18) { 1128ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 1129ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1130ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1131ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 113282b1193eSBarry Smith } else { 11336861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 11346861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 11356861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 11366861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 113782b1193eSBarry Smith } 1138fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 11399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1140fdc842d1SBarry Smith #endif 11419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 11429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1143f4a53059SBarry Smith } else { 1144f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1145feb9fe23SJed Brown Mat_MPIMAIJ *b; 1146f4a53059SBarry Smith IS from, to; 1147f4a53059SBarry Smith Vec gvec; 1148f4a53059SBarry Smith 11499566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 115026fbe8dcSKarl Rupp 11510298fd71SBarry Smith B->ops->setup = NULL; 1152d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 11530fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 115426fbe8dcSKarl Rupp 1155b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 1156b9b97703SBarry Smith b->dof = dof; 1157b9b97703SBarry Smith b->A = A; 115826fbe8dcSKarl Rupp 11599566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 11609566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 11614cb9d9b8SBarry Smith 11629566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 11639566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 11649566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 11659566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 11669566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 1167f4a53059SBarry Smith 1168f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 11699566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 11709566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1171f4a53059SBarry Smith 1172f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 11739566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1174f4a53059SBarry Smith 1175f4a53059SBarry Smith /* generate the scatter context */ 11769566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1177f4a53059SBarry Smith 11789566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 11799566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 11809566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1181f4a53059SBarry Smith 1182f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 11834cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 11844cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 11854cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 118626fbe8dcSKarl Rupp 1187fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 11889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1189fdc842d1SBarry Smith #endif 11909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 11919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1192f4a53059SBarry Smith } 11937dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1194ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 11959566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 1196fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1197fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 1198fdc842d1SBarry Smith { 1199fdc842d1SBarry Smith PetscBool flg; 1200fdc842d1SBarry Smith if (convert) { 12019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 120248a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1203fdc842d1SBarry Smith } 1204fdc842d1SBarry Smith } 1205fdc842d1SBarry Smith #endif 1206cd3bbe55SBarry Smith *maij = B; 1207d72c5c08SBarry Smith } 12083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 120982b1193eSBarry Smith } 1210