1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 611a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 811a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 1111a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 1411a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 1811a5261eSBarry Smith Note: 1911a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 23d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24d71ae5a4SJacob Faibussowitsch { 25ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 26b9b97703SBarry Smith 27b9b97703SBarry Smith PetscFunctionBegin; 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30b9b97703SBarry Smith if (ismpimaij) { 31b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32b9b97703SBarry Smith 33b9b97703SBarry Smith *B = b->A; 34b9b97703SBarry Smith } else if (isseqmaij) { 35b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->AIJ; 38b9b97703SBarry Smith } else { 39b9b97703SBarry Smith *B = A; 40b9b97703SBarry Smith } 413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42b9b97703SBarry Smith } 43b9b97703SBarry Smith 44480dffcfSJed Brown /*@ 4511a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46ff585edeSJed Brown 473f9fe445SBarry Smith Logically Collective 48ff585edeSJed Brown 49d8d19677SJose E. Roman Input Parameters: 5011a5261eSBarry Smith + A - the `MATMAIJ` matrix 51ff585edeSJed Brown - dof - the block size for the new matrix 52ff585edeSJed Brown 53ff585edeSJed Brown Output Parameter: 5411a5261eSBarry Smith . B - the new `MATMAIJ` matrix 55ff585edeSJed Brown 56ff585edeSJed Brown Level: advanced 57ff585edeSJed Brown 5811a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59ff585edeSJed Brown @*/ 60d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61d71ae5a4SJacob Faibussowitsch { 620298fd71SBarry Smith Mat Aij = NULL; 63b9b97703SBarry Smith 64b9b97703SBarry Smith PetscFunctionBegin; 65c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 669566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 679566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 69b9b97703SBarry Smith } 70b9b97703SBarry Smith 7180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72d71ae5a4SJacob Faibussowitsch { 734cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7482b1193eSBarry Smith 7582b1193eSBarry Smith PetscFunctionBegin; 769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 779566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 782e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 824cb9d9b8SBarry Smith } 834cb9d9b8SBarry Smith 8480eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A) 85d71ae5a4SJacob Faibussowitsch { 86356c569eSBarry Smith PetscFunctionBegin; 87ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88356c569eSBarry Smith } 89356c569eSBarry Smith 9080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91d71ae5a4SJacob Faibussowitsch { 920fd73130SBarry Smith Mat B; 930fd73130SBarry Smith 940fd73130SBarry Smith PetscFunctionBegin; 959566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 969566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 990fd73130SBarry Smith } 1000fd73130SBarry Smith 10180eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102d71ae5a4SJacob Faibussowitsch { 1030fd73130SBarry Smith Mat B; 1040fd73130SBarry Smith 1050fd73130SBarry Smith PetscFunctionBegin; 1069566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1100fd73130SBarry Smith } 1110fd73130SBarry Smith 11280eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113d71ae5a4SJacob Faibussowitsch { 1144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1154cb9d9b8SBarry Smith 1164cb9d9b8SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1229566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1232e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1269566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 128b9b97703SBarry Smith } 129b9b97703SBarry Smith 1300bad9183SKris Buschelman /*MC 131fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1320bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13311a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1340bad9183SKris Buschelman 1350bad9183SKris Buschelman Operations provided: 13667b8a455SSatish Balay .vb 13711a5261eSBarry Smith MatMult() 13811a5261eSBarry Smith MatMultTranspose() 13911a5261eSBarry Smith MatMultAdd() 14011a5261eSBarry Smith MatMultTransposeAdd() 14167b8a455SSatish Balay .ve 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 14511a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 148d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149d71ae5a4SJacob Faibussowitsch { 1504cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15164b52464SHong Zhang PetscMPIInt size; 15282b1193eSBarry Smith 15382b1193eSBarry Smith PetscFunctionBegin; 1544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 155b0a32e0cSBarry Smith A->data = (void *)b; 15626fbe8dcSKarl Rupp 1579566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15826fbe8dcSKarl Rupp 159356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 160f4a53059SBarry Smith 161f4259b30SLisandro Dalcin b->AIJ = NULL; 162cd3bbe55SBarry Smith b->dof = 0; 163f4259b30SLisandro Dalcin b->OAIJ = NULL; 164f4259b30SLisandro Dalcin b->ctx = NULL; 165f4259b30SLisandro Dalcin b->w = NULL; 1669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16764b52464SHong Zhang if (size == 1) { 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16964b52464SHong Zhang } else { 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17164b52464SHong Zhang } 17232e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17332e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith 17780eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline) 17880eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline)) 17980eb40d0SJacob Faibussowitsch #else 18080eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE 18180eb40d0SJacob Faibussowitsch #endif 182bb5b24f5SJacob Faibussowitsch 183*9927e305SJacob Faibussowitsch #if defined(__clang__) 184bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 185bb5b24f5SJacob Faibussowitsch #else 186bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL 187bb5b24f5SJacob Faibussowitsch #endif 188bb5b24f5SJacob Faibussowitsch 18980eb40d0SJacob Faibussowitsch enum { 19080eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 19180eb40d0SJacob Faibussowitsch }; 19280eb40d0SJacob Faibussowitsch 19380eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 19480eb40d0SJacob Faibussowitsch // keyword into account for these... 19580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 19680eb40d0SJacob Faibussowitsch { 19780eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 19880eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19980eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 20080eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 20180eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 20280eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 20380eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j; 20480eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i; 20580eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a; 20680eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0; 20780eb40d0SJacob Faibussowitsch const PetscScalar *x; 20880eb40d0SJacob Faibussowitsch PetscScalar *z; 20980eb40d0SJacob Faibussowitsch 21080eb40d0SJacob Faibussowitsch PetscFunctionBegin; 21180eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 21280eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 21380eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21480eb40d0SJacob Faibussowitsch if (mult_add) { 21580eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 21680eb40d0SJacob Faibussowitsch } else { 21780eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z)); 21880eb40d0SJacob Faibussowitsch } 21980eb40d0SJacob Faibussowitsch 22080eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) { 22180eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i]; 22280eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow; 22380eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls 22480eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 22580eb40d0SJacob Faibussowitsch 22680eb40d0SJacob Faibussowitsch nonzerorow += n > 0; 22780eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) { 22880eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow]; 22980eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow]; 23080eb40d0SJacob Faibussowitsch 231bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23280eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 23380eb40d0SJacob Faibussowitsch } 23480eb40d0SJacob Faibussowitsch 235bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23680eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) { 23780eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k; 23880eb40d0SJacob Faibussowitsch 23980eb40d0SJacob Faibussowitsch if (mult_add) { 24080eb40d0SJacob Faibussowitsch z[z_idx] += sum[k]; 24180eb40d0SJacob Faibussowitsch } else { 24280eb40d0SJacob Faibussowitsch z[z_idx] = sum[k]; 24380eb40d0SJacob Faibussowitsch } 24480eb40d0SJacob Faibussowitsch } 24580eb40d0SJacob Faibussowitsch } 24680eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 24780eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24880eb40d0SJacob Faibussowitsch if (mult_add) { 24980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 25080eb40d0SJacob Faibussowitsch } else { 25180eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z)); 25280eb40d0SJacob Faibussowitsch } 25380eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25480eb40d0SJacob Faibussowitsch } 25580eb40d0SJacob Faibussowitsch 25680eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 25780eb40d0SJacob Faibussowitsch { 25880eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 25980eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26080eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 26180eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 26280eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 26380eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 26480eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j; 26580eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i; 26680eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a; 26780eb40d0SJacob Faibussowitsch const PetscScalar *x; 26880eb40d0SJacob Faibussowitsch PetscScalar *z; 26980eb40d0SJacob Faibussowitsch 27080eb40d0SJacob Faibussowitsch PetscFunctionBegin; 27180eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 27280eb40d0SJacob Faibussowitsch if (mult_add) { 27380eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 27480eb40d0SJacob Faibussowitsch } else { 27580eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 27680eb40d0SJacob Faibussowitsch } 27780eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27880eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 27980eb40d0SJacob Faibussowitsch 28080eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) { 28180eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i]; 28280eb40d0SJacob Faibussowitsch const PetscInt *idx = a_j + a_ii; 28380eb40d0SJacob Faibussowitsch const PetscScalar *v = a_a + a_ii; 28480eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii; 28580eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 28680eb40d0SJacob Faibussowitsch 287bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 28880eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 28980eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) { 29080eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j]; 29180eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j]; 29280eb40d0SJacob Faibussowitsch 293bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 29480eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 29580eb40d0SJacob Faibussowitsch } 29680eb40d0SJacob Faibussowitsch } 29780eb40d0SJacob Faibussowitsch 29880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz)); 29980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 30080eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 30180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30280eb40d0SJacob Faibussowitsch } 30380eb40d0SJacob Faibussowitsch 304cd3bbe55SBarry Smith /* -------------------------------------------------------------------------------------- */ 30580eb40d0SJacob Faibussowitsch 306*9927e305SJacob Faibussowitsch #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 307*9927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 308*9927e305SJacob Faibussowitsch { \ 309*9927e305SJacob Faibussowitsch PetscFunctionBegin; \ 310*9927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 311*9927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 312*9927e305SJacob Faibussowitsch } \ 313*9927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 314*9927e305SJacob Faibussowitsch { \ 315*9927e305SJacob Faibussowitsch PetscFunctionBegin; \ 316*9927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 317*9927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 318*9927e305SJacob Faibussowitsch } \ 319*9927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 320*9927e305SJacob Faibussowitsch { \ 321*9927e305SJacob Faibussowitsch PetscFunctionBegin; \ 322*9927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 323*9927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 324*9927e305SJacob Faibussowitsch } \ 325*9927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 326*9927e305SJacob Faibussowitsch { \ 327*9927e305SJacob Faibussowitsch PetscFunctionBegin; \ 328*9927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 329*9927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 33082b1193eSBarry Smith } 331bcc973b7SBarry Smith 332*9927e305SJacob Faibussowitsch // clang-format off 333*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 334*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 335*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 336*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 337*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 338*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 339*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 340*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 341*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 342*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 343*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 344*9927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 345*9927e305SJacob Faibussowitsch // clang-format on 346bcc973b7SBarry Smith 347*9927e305SJacob Faibussowitsch #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 348ed1c418cSBarry Smith 34980eb40d0SJacob Faibussowitsch /*-------------------------------------------------------------------------------------------- */ 35080eb40d0SJacob Faibussowitsch 35180eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 352d71ae5a4SJacob Faibussowitsch { 3536861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3546861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3556861f193SBarry Smith const PetscScalar *x, *v; 3566861f193SBarry Smith PetscScalar *y, *sums; 3576861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3586861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3596861f193SBarry Smith 3606861f193SBarry Smith PetscFunctionBegin; 3619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3629566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3639566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3646861f193SBarry Smith idx = a->j; 3656861f193SBarry Smith v = a->a; 3666861f193SBarry Smith ii = a->i; 3676861f193SBarry Smith 3686861f193SBarry Smith for (i = 0; i < m; i++) { 3696861f193SBarry Smith jrow = ii[i]; 3706861f193SBarry Smith n = ii[i + 1] - jrow; 3716861f193SBarry Smith sums = y + dof * i; 3726861f193SBarry Smith for (j = 0; j < n; j++) { 373ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 3746861f193SBarry Smith jrow++; 3756861f193SBarry Smith } 3766861f193SBarry Smith } 3776861f193SBarry Smith 3789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 3799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 3813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3826861f193SBarry Smith } 3836861f193SBarry Smith 38480eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 385d71ae5a4SJacob Faibussowitsch { 3866861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3876861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3886861f193SBarry Smith const PetscScalar *x, *v; 3896861f193SBarry Smith PetscScalar *y, *sums; 3906861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3916861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3926861f193SBarry Smith 3936861f193SBarry Smith PetscFunctionBegin; 3949566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3969566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 3976861f193SBarry Smith idx = a->j; 3986861f193SBarry Smith v = a->a; 3996861f193SBarry Smith ii = a->i; 4006861f193SBarry Smith 4016861f193SBarry Smith for (i = 0; i < m; i++) { 4026861f193SBarry Smith jrow = ii[i]; 4036861f193SBarry Smith n = ii[i + 1] - jrow; 4046861f193SBarry Smith sums = y + dof * i; 4056861f193SBarry Smith for (j = 0; j < n; j++) { 406ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 4076861f193SBarry Smith jrow++; 4086861f193SBarry Smith } 4096861f193SBarry Smith } 4106861f193SBarry Smith 4119566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4129566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4139566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4156861f193SBarry Smith } 4166861f193SBarry Smith 41780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 418d71ae5a4SJacob Faibussowitsch { 4196861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4206861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4216861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4226861f193SBarry Smith PetscScalar *y; 4236861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4246861f193SBarry Smith PetscInt n, i, k; 4256861f193SBarry Smith 4266861f193SBarry Smith PetscFunctionBegin; 4279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4289566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 4299566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4306861f193SBarry Smith for (i = 0; i < m; i++) { 4316861f193SBarry Smith idx = a->j + a->i[i]; 4326861f193SBarry Smith v = a->a + a->i[i]; 4336861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4346861f193SBarry Smith alpha = x + dof * i; 4356861f193SBarry Smith while (n-- > 0) { 436ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4379371c9d4SSatish Balay idx++; 4389371c9d4SSatish Balay v++; 4396861f193SBarry Smith } 4406861f193SBarry Smith } 4419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4439566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4456861f193SBarry Smith } 4466861f193SBarry Smith 44780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 448d71ae5a4SJacob Faibussowitsch { 4496861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4506861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4516861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4526861f193SBarry Smith PetscScalar *y; 4536861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4546861f193SBarry Smith PetscInt n, i, k; 4556861f193SBarry Smith 4566861f193SBarry Smith PetscFunctionBegin; 4579566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4599566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 4606861f193SBarry Smith for (i = 0; i < m; i++) { 4616861f193SBarry Smith idx = a->j + a->i[i]; 4626861f193SBarry Smith v = a->a + a->i[i]; 4636861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4646861f193SBarry Smith alpha = x + dof * i; 4656861f193SBarry Smith while (n-- > 0) { 466ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4679371c9d4SSatish Balay idx++; 4689371c9d4SSatish Balay v++; 4696861f193SBarry Smith } 4706861f193SBarry Smith } 4719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4729566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4756861f193SBarry Smith } 4766861f193SBarry Smith 47780eb40d0SJacob Faibussowitsch /*-------------------------------------------------------------------------------------------- */ 47880eb40d0SJacob Faibussowitsch 47980eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 480d71ae5a4SJacob Faibussowitsch { 4814cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 482f4a53059SBarry Smith 483b24ad042SBarry Smith PetscFunctionBegin; 484f4a53059SBarry Smith /* start the scatter */ 4859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4869566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 4879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4889566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 4893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 490f4a53059SBarry Smith } 491f4a53059SBarry Smith 49280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 493d71ae5a4SJacob Faibussowitsch { 4944cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 495b24ad042SBarry Smith 4964cb9d9b8SBarry Smith PetscFunctionBegin; 4979566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 4989566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 4999566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 5009566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 5013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5024cb9d9b8SBarry Smith } 5034cb9d9b8SBarry Smith 50480eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 505d71ae5a4SJacob Faibussowitsch { 5064cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 5074cb9d9b8SBarry Smith 508b24ad042SBarry Smith PetscFunctionBegin; 5094cb9d9b8SBarry Smith /* start the scatter */ 5109566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5119566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 5129566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5139566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 5143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5154cb9d9b8SBarry Smith } 5164cb9d9b8SBarry Smith 51780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 518d71ae5a4SJacob Faibussowitsch { 5194cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 520b24ad042SBarry Smith 5214cb9d9b8SBarry Smith PetscFunctionBegin; 5229566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 5239566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 5249566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5259566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5274cb9d9b8SBarry Smith } 5284cb9d9b8SBarry Smith 52995ddefa2SHong Zhang /* ---------------------------------------------------------------- */ 53080eb40d0SJacob Faibussowitsch 53180eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 532d71ae5a4SJacob Faibussowitsch { 5334222ddf1SHong Zhang Mat_Product *product = C->product; 5344222ddf1SHong Zhang 5354222ddf1SHong Zhang PetscFunctionBegin; 5364222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 5374222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 53898921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 5393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5404222ddf1SHong Zhang } 5414222ddf1SHong Zhang 54280eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 543d71ae5a4SJacob Faibussowitsch { 5444222ddf1SHong Zhang Mat_Product *product = C->product; 5454222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 5464222ddf1SHong Zhang Mat A = product->A, P = product->B; 5474222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 5484222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 5494222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 5504222ddf1SHong Zhang PetscInt nalg = 4; 5514222ddf1SHong Zhang #else 5524222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 5534222ddf1SHong Zhang PetscInt nalg = 5; 5544222ddf1SHong Zhang #endif 5554222ddf1SHong Zhang 5564222ddf1SHong Zhang PetscFunctionBegin; 55708401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 5584222ddf1SHong Zhang 5594222ddf1SHong Zhang /* PtAP */ 5604222ddf1SHong Zhang /* Check matrix local sizes */ 5619371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5629371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 5639371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5649371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 5654222ddf1SHong Zhang 5664222ddf1SHong Zhang /* Set the default algorithm */ 5679566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 56848a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 5694222ddf1SHong Zhang 5704222ddf1SHong Zhang /* Get runtime option */ 571d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 5729566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 57348a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 574d0609cedSBarry Smith PetscOptionsEnd(); 5754222ddf1SHong Zhang 5769566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 5774222ddf1SHong Zhang if (flg) { 5784222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5804222ddf1SHong Zhang } 5814222ddf1SHong Zhang 5829566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 5834222ddf1SHong Zhang if (flg) { 5844222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5864222ddf1SHong Zhang } 5874222ddf1SHong Zhang 5884222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 5899566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 5909566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 5919566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 5923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5934222ddf1SHong Zhang } 5944222ddf1SHong Zhang 5954222ddf1SHong Zhang /* ---------------------------------------------------------------- */ 59680eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 59780eb40d0SJacob Faibussowitsch { 59880eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */ 59980eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 60080eb40d0SJacob Faibussowitsch Mat P = pp->AIJ; 60180eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 60280eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 60380eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 60480eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 60580eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj; 60680eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 60780eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 60880eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 60980eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa; 61080eb40d0SJacob Faibussowitsch 61180eb40d0SJacob Faibussowitsch PetscFunctionBegin; 61280eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */ 61380eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 61480eb40d0SJacob Faibussowitsch 61580eb40d0SJacob Faibussowitsch /* Clear old values in C */ 61680eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 61780eb40d0SJacob Faibussowitsch 61880eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) { 61980eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */ 62080eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i]; 62180eb40d0SJacob Faibussowitsch apnzj = 0; 62280eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) { 62380eb40d0SJacob Faibussowitsch /* Get offset within block of P */ 62480eb40d0SJacob Faibussowitsch pshift = *aj % ppdof; 62580eb40d0SJacob Faibussowitsch /* Get block row of P */ 62680eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */ 62780eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow]; 62880eb40d0SJacob Faibussowitsch pjj = pj + pi[prow]; 62980eb40d0SJacob Faibussowitsch paj = pa + pi[prow]; 63080eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) { 63180eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift; 63280eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) { 63380eb40d0SJacob Faibussowitsch apjdense[poffset] = -1; 63480eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset; 63580eb40d0SJacob Faibussowitsch } 63680eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k]; 63780eb40d0SJacob Faibussowitsch } 63880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 63980eb40d0SJacob Faibussowitsch aa++; 64080eb40d0SJacob Faibussowitsch } 64180eb40d0SJacob Faibussowitsch 64280eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */ 64380eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */ 64480eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 64580eb40d0SJacob Faibussowitsch 64680eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 64780eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */ 64880eb40d0SJacob Faibussowitsch pshift = i % ppdof; 64980eb40d0SJacob Faibussowitsch poffset = pi[prow]; 65080eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset; 65180eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 65280eb40d0SJacob Faibussowitsch pJ = pj + poffset; 65380eb40d0SJacob Faibussowitsch pA = pa + poffset; 65480eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) { 65580eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift; 65680eb40d0SJacob Faibussowitsch cjj = cj + ci[crow]; 65780eb40d0SJacob Faibussowitsch caj = ca + ci[crow]; 65880eb40d0SJacob Faibussowitsch pJ++; 65980eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */ 66080eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) { 66180eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 66280eb40d0SJacob Faibussowitsch } 66380eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 66480eb40d0SJacob Faibussowitsch pA++; 66580eb40d0SJacob Faibussowitsch } 66680eb40d0SJacob Faibussowitsch 66780eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */ 66880eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) { 66980eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.; 67080eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0; 67180eb40d0SJacob Faibussowitsch } 67280eb40d0SJacob Faibussowitsch } 67380eb40d0SJacob Faibussowitsch 67480eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */ 67580eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 67680eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 67780eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 67880eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 67980eb40d0SJacob Faibussowitsch } 68080eb40d0SJacob Faibussowitsch 68180eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 682d71ae5a4SJacob Faibussowitsch { 6830298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 6847ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 6857ba1a0bfSKris Buschelman Mat P = pp->AIJ; 6867ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 687d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 6887ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 689d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 690d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 6917ba1a0bfSKris Buschelman MatScalar *ca; 692d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 6937ba1a0bfSKris Buschelman 6947ba1a0bfSKris Buschelman PetscFunctionBegin; 6957ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 6969566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 6977ba1a0bfSKris Buschelman 6987ba1a0bfSKris Buschelman cn = pn * ppdof; 6997ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 7007ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 7019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 7027ba1a0bfSKris Buschelman ci[0] = 0; 7037ba1a0bfSKris Buschelman 7047ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 7059566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 7069566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 7079566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 7087ba1a0bfSKris Buschelman 7097ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 7107ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 7117ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 7129566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 7137ba1a0bfSKris Buschelman current_space = free_space; 7147ba1a0bfSKris Buschelman 7157ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 7167ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 7177ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 7187ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 7197ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 7207ba1a0bfSKris Buschelman ptanzi = 0; 7217ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 7227ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 7237ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 7247ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 7257ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 7267ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 7277ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 7287ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 7297ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 7307ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 7317ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7327ba1a0bfSKris Buschelman } 7337ba1a0bfSKris Buschelman } 7347ba1a0bfSKris Buschelman } 7357ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7367ba1a0bfSKris Buschelman ptaj = ptasparserow; 7377ba1a0bfSKris Buschelman cnzi = 0; 7387ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 7397ba1a0bfSKris Buschelman /* Get offset within block of P */ 7407ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 7417ba1a0bfSKris Buschelman /* Get block row of P */ 7427ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 7437ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 7447ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 7457ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 7467ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 7477ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 7487ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 7497ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 7507ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 7517ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 7527ba1a0bfSKris Buschelman } 7537ba1a0bfSKris Buschelman } 7547ba1a0bfSKris Buschelman } 7557ba1a0bfSKris Buschelman 7567ba1a0bfSKris Buschelman /* sort sparserow */ 7579566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 7587ba1a0bfSKris Buschelman 7597ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 7607ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 76148a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 7627ba1a0bfSKris Buschelman 7637ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 7649566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 76526fbe8dcSKarl Rupp 7667ba1a0bfSKris Buschelman current_space->array += cnzi; 7677ba1a0bfSKris Buschelman current_space->local_used += cnzi; 7687ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 7697ba1a0bfSKris Buschelman 77026fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 77126fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 77226fbe8dcSKarl Rupp 7737ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7747ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 7757ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 7767ba1a0bfSKris Buschelman } 7777ba1a0bfSKris Buschelman } 7787ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7797ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7807ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 7819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 7829566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 7839566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 7847ba1a0bfSKris Buschelman 7857ba1a0bfSKris Buschelman /* Allocate space for ca */ 7869566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 7877ba1a0bfSKris Buschelman 7887ba1a0bfSKris Buschelman /* put together the new matrix */ 7899566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 7909566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 7917ba1a0bfSKris Buschelman 7927ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7937ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 7944222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 795e6b907acSBarry Smith c->free_a = PETSC_TRUE; 796e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 7977ba1a0bfSKris Buschelman c->nonew = 0; 79826fbe8dcSKarl Rupp 7994222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 8004222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8017ba1a0bfSKris Buschelman 8027ba1a0bfSKris Buschelman /* Clean up. */ 8039566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 8043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8057ba1a0bfSKris Buschelman } 8067ba1a0bfSKris Buschelman 807d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 808d71ae5a4SJacob Faibussowitsch { 8094222ddf1SHong Zhang Mat_Product *product = C->product; 8104222ddf1SHong Zhang Mat A = product->A, P = product->B; 8112121bac1SHong Zhang 8122121bac1SHong Zhang PetscFunctionBegin; 8139566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 8143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8152121bac1SHong Zhang } 8162121bac1SHong Zhang 817bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 818bc8e477aSFande Kong 819d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 820d71ae5a4SJacob Faibussowitsch { 821bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 822bc8e477aSFande Kong 823bc8e477aSFande Kong PetscFunctionBegin; 82434bcad68SFande Kong 8259566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 8263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 827bc8e477aSFande Kong } 828bc8e477aSFande Kong 8294222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 830bc8e477aSFande Kong 831d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 832d71ae5a4SJacob Faibussowitsch { 833bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 834bc8e477aSFande Kong 835bc8e477aSFande Kong PetscFunctionBegin; 8369566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 8374222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 8383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 839bc8e477aSFande Kong } 840bc8e477aSFande Kong 841bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 842bc8e477aSFande Kong 843d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 844d71ae5a4SJacob Faibussowitsch { 845bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 846bc8e477aSFande Kong 847bc8e477aSFande Kong PetscFunctionBegin; 84834bcad68SFande Kong 8499566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 8503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 851bc8e477aSFande Kong } 852bc8e477aSFande Kong 8534222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 854bc8e477aSFande Kong 855d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 856d71ae5a4SJacob Faibussowitsch { 857bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 858bc8e477aSFande Kong 859bc8e477aSFande Kong PetscFunctionBegin; 86034bcad68SFande Kong 8619566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 8624222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 8633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 864bc8e477aSFande Kong } 865bc8e477aSFande Kong 866d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 867d71ae5a4SJacob Faibussowitsch { 8684222ddf1SHong Zhang Mat_Product *product = C->product; 8694222ddf1SHong Zhang Mat A = product->A, P = product->B; 8704222ddf1SHong Zhang PetscBool flg; 871bc8e477aSFande Kong 872bc8e477aSFande Kong PetscFunctionBegin; 8739566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 8744222ddf1SHong Zhang if (flg) { 8759566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 8764222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 878bc8e477aSFande Kong } 87934bcad68SFande Kong 8809566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 8814222ddf1SHong Zhang if (flg) { 8829566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 8834222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8854222ddf1SHong Zhang } 88634bcad68SFande Kong 8874222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 888bc8e477aSFande Kong } 889bc8e477aSFande Kong 890d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 891d71ae5a4SJacob Faibussowitsch { 8929c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 893ceb03754SKris Buschelman Mat a = b->AIJ, B; 8949c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 8950fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 896ba8c8a56SBarry Smith PetscInt *cols; 897ba8c8a56SBarry Smith PetscScalar *vals; 8989c4fc4c7SBarry Smith 8999c4fc4c7SBarry Smith PetscFunctionBegin; 9009566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 9019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 9029c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 9039c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 90426fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 9059c4fc4c7SBarry Smith } 9069566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 9079566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 9089566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 9099566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 9109566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 9119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 9129c4fc4c7SBarry Smith ii = 0; 9139c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 9149566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9150fd73130SBarry Smith for (j = 0; j < dof; j++) { 91626fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 9179566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9189c4fc4c7SBarry Smith ii++; 9199c4fc4c7SBarry Smith } 9209566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9219c4fc4c7SBarry Smith } 9229566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 9239566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9249566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 925ceb03754SKris Buschelman 926511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9279566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 928c3d102feSKris Buschelman } else { 929ceb03754SKris Buschelman *newmat = B; 930c3d102feSKris Buschelman } 9313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9329c4fc4c7SBarry Smith } 9339c4fc4c7SBarry Smith 934c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 935be1d678aSKris Buschelman 936d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 937d71ae5a4SJacob Faibussowitsch { 9380fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 939ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 9400fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 9410fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 9420fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 9430fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 9440298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 9450298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 9460fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 9470fd73130SBarry Smith PetscScalar *vals, *ovals; 9480fd73130SBarry Smith 9490fd73130SBarry Smith PetscFunctionBegin; 9509566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 951d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9520fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 9530fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 9540fd73130SBarry Smith for (j = 0; j < dof; j++) { 9550fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 9560fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 9570fd73130SBarry Smith } 9580fd73130SBarry Smith } 9599566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 9609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 9619566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 9629566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 9639566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 9649566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 9650fd73130SBarry Smith 9669566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 967d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 968d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 9690fd73130SBarry Smith garray = mpiaij->garray; 9700fd73130SBarry Smith 9710fd73130SBarry Smith ii = rstart; 972d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9739566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9749566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9750fd73130SBarry Smith for (j = 0; j < dof; j++) { 976ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 977ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 9789566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9799566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 9800fd73130SBarry Smith ii++; 9810fd73130SBarry Smith } 9829566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9839566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9840fd73130SBarry Smith } 9859566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 9860fd73130SBarry Smith 9879566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9889566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 989ceb03754SKris Buschelman 990511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9917adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 9927adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 99326fbe8dcSKarl Rupp 9949566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 99526fbe8dcSKarl Rupp 9967adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 997c3d102feSKris Buschelman } else { 998ceb03754SKris Buschelman *newmat = B; 999c3d102feSKris Buschelman } 10003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10010fd73130SBarry Smith } 10020fd73130SBarry Smith 100380eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1004d71ae5a4SJacob Faibussowitsch { 10059e516c8fSBarry Smith Mat A; 10069e516c8fSBarry Smith 10079e516c8fSBarry Smith PetscFunctionBegin; 10089566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 10099566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 10109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10129e516c8fSBarry Smith } 10130fd73130SBarry Smith 101480eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1015d71ae5a4SJacob Faibussowitsch { 1016ec626240SStefano Zampini Mat A; 1017ec626240SStefano Zampini 1018ec626240SStefano Zampini PetscFunctionBegin; 10199566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 10209566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 10219566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1023ec626240SStefano Zampini } 1024ec626240SStefano Zampini 1025bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 1026480dffcfSJed Brown /*@ 10270bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 10280bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 102911a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 103011a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 10310bad9183SKris Buschelman 1032ff585edeSJed Brown Collective 1033ff585edeSJed Brown 1034ff585edeSJed Brown Input Parameters: 103511a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 1036ff585edeSJed Brown - dof - the block size (number of components per node) 1037ff585edeSJed Brown 1038ff585edeSJed Brown Output Parameter: 103911a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 1040ff585edeSJed Brown 10410bad9183SKris Buschelman Operations provided: 104267b8a455SSatish Balay .vb 104311a5261eSBarry Smith MatMult() 104411a5261eSBarry Smith MatMultTranspose() 104511a5261eSBarry Smith MatMultAdd() 104611a5261eSBarry Smith MatMultTransposeAdd() 104711a5261eSBarry Smith MatView() 104867b8a455SSatish Balay .ve 10490bad9183SKris Buschelman 10500bad9183SKris Buschelman Level: advanced 10510bad9183SKris Buschelman 105211a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1053ff585edeSJed Brown @*/ 1054d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1055d71ae5a4SJacob Faibussowitsch { 1056b24ad042SBarry Smith PetscInt n; 105782b1193eSBarry Smith Mat B; 105890f67b22SBarry Smith PetscBool flg; 1059fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1060fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1061fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1062fdc842d1SBarry Smith #endif 106382b1193eSBarry Smith 106482b1193eSBarry Smith PetscFunctionBegin; 1065fdc842d1SBarry Smith dof = PetscAbs(dof); 10669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 1067d72c5c08SBarry Smith 106826fbe8dcSKarl Rupp if (dof == 1) *maij = A; 106926fbe8dcSKarl Rupp else { 10709566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1071c248e2fdSStefano Zampini /* propagate vec type */ 10729566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 10739566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 10749566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 10759566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 10769566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 10779566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 107826fbe8dcSKarl Rupp 1079cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1080d72c5c08SBarry Smith 108190f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 108290f67b22SBarry Smith if (flg) { 1083feb9fe23SJed Brown Mat_SeqMAIJ *b; 1084feb9fe23SJed Brown 10859566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 108626fbe8dcSKarl Rupp 10870298fd71SBarry Smith B->ops->setup = NULL; 10884cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 10890fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 10904222ddf1SHong Zhang 1091feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 1092b9b97703SBarry Smith b->dof = dof; 10934cb9d9b8SBarry Smith b->AIJ = A; 109426fbe8dcSKarl Rupp 1095d72c5c08SBarry Smith if (dof == 2) { 1096cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1097cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1098cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1099cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1100bcc973b7SBarry Smith } else if (dof == 3) { 1101bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1102bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1103bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1104bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1105bcc973b7SBarry Smith } else if (dof == 4) { 1106bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1107bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1108bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1109bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1110f9fae5adSBarry Smith } else if (dof == 5) { 1111f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1112f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1113f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1114f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 11156cd98798SBarry Smith } else if (dof == 6) { 11166cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 11176cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 11186cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 11196cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1120ed8eea03SBarry Smith } else if (dof == 7) { 1121ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 1122ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1123ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1124ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 112566ed3db0SBarry Smith } else if (dof == 8) { 112666ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 112766ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 112866ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 112966ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 11302b692628SMatthew Knepley } else if (dof == 9) { 11312b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 11322b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 11332b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 11342b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1135b9cda22cSBarry Smith } else if (dof == 10) { 1136b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 1137b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1138b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1139b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1140dbdd7285SBarry Smith } else if (dof == 11) { 1141dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 1142dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1143dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1144dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 11452f7816d4SBarry Smith } else if (dof == 16) { 11462f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 11472f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 11482f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 11492f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1150ed1c418cSBarry Smith } else if (dof == 18) { 1151ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 1152ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1153ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1154ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 115582b1193eSBarry Smith } else { 11566861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 11576861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 11586861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 11596861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 116082b1193eSBarry Smith } 1161fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 11629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1163fdc842d1SBarry Smith #endif 11649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 11659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1166f4a53059SBarry Smith } else { 1167f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1168feb9fe23SJed Brown Mat_MPIMAIJ *b; 1169f4a53059SBarry Smith IS from, to; 1170f4a53059SBarry Smith Vec gvec; 1171f4a53059SBarry Smith 11729566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 117326fbe8dcSKarl Rupp 11740298fd71SBarry Smith B->ops->setup = NULL; 1175d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 11760fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 117726fbe8dcSKarl Rupp 1178b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 1179b9b97703SBarry Smith b->dof = dof; 1180b9b97703SBarry Smith b->A = A; 118126fbe8dcSKarl Rupp 11829566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 11839566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 11844cb9d9b8SBarry Smith 11859566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 11869566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 11879566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 11889566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 11899566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 1190f4a53059SBarry Smith 1191f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 11929566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 11939566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1194f4a53059SBarry Smith 1195f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 11969566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1197f4a53059SBarry Smith 1198f4a53059SBarry Smith /* generate the scatter context */ 11999566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1200f4a53059SBarry Smith 12019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 12029566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 12039566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1204f4a53059SBarry Smith 1205f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 12064cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 12074cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 12084cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 120926fbe8dcSKarl Rupp 1210fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 12119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1212fdc842d1SBarry Smith #endif 12139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 12149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1215f4a53059SBarry Smith } 12167dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1217ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 12189566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 1219fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1220fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 1221fdc842d1SBarry Smith { 1222fdc842d1SBarry Smith PetscBool flg; 1223fdc842d1SBarry Smith if (convert) { 12249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 122548a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1226fdc842d1SBarry Smith } 1227fdc842d1SBarry Smith } 1228fdc842d1SBarry Smith #endif 1229cd3bbe55SBarry Smith *maij = B; 1230d72c5c08SBarry Smith } 12313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 123282b1193eSBarry Smith } 1233