1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 611a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 811a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 1111a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 1411a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 1811a5261eSBarry Smith Note: 1911a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 21*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 23d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24d71ae5a4SJacob Faibussowitsch { 25ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 26b9b97703SBarry Smith 27b9b97703SBarry Smith PetscFunctionBegin; 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30b9b97703SBarry Smith if (ismpimaij) { 31b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32b9b97703SBarry Smith 33b9b97703SBarry Smith *B = b->A; 34b9b97703SBarry Smith } else if (isseqmaij) { 35b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->AIJ; 38b9b97703SBarry Smith } else { 39b9b97703SBarry Smith *B = A; 40b9b97703SBarry Smith } 413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42b9b97703SBarry Smith } 43b9b97703SBarry Smith 44480dffcfSJed Brown /*@ 4511a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46ff585edeSJed Brown 473f9fe445SBarry Smith Logically Collective 48ff585edeSJed Brown 49d8d19677SJose E. Roman Input Parameters: 5011a5261eSBarry Smith + A - the `MATMAIJ` matrix 51ff585edeSJed Brown - dof - the block size for the new matrix 52ff585edeSJed Brown 53ff585edeSJed Brown Output Parameter: 5411a5261eSBarry Smith . B - the new `MATMAIJ` matrix 55ff585edeSJed Brown 56ff585edeSJed Brown Level: advanced 57ff585edeSJed Brown 58*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 59ff585edeSJed Brown @*/ 60d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61d71ae5a4SJacob Faibussowitsch { 620298fd71SBarry Smith Mat Aij = NULL; 63b9b97703SBarry Smith 64b9b97703SBarry Smith PetscFunctionBegin; 65c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 669566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 679566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 69b9b97703SBarry Smith } 70b9b97703SBarry Smith 7180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72d71ae5a4SJacob Faibussowitsch { 734cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7482b1193eSBarry Smith 7582b1193eSBarry Smith PetscFunctionBegin; 769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 779566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 782e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 824cb9d9b8SBarry Smith } 834cb9d9b8SBarry Smith 8480eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A) 85d71ae5a4SJacob Faibussowitsch { 86356c569eSBarry Smith PetscFunctionBegin; 87ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88356c569eSBarry Smith } 89356c569eSBarry Smith 9080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91d71ae5a4SJacob Faibussowitsch { 920fd73130SBarry Smith Mat B; 930fd73130SBarry Smith 940fd73130SBarry Smith PetscFunctionBegin; 959566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 969566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 990fd73130SBarry Smith } 1000fd73130SBarry Smith 10180eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102d71ae5a4SJacob Faibussowitsch { 1030fd73130SBarry Smith Mat B; 1040fd73130SBarry Smith 1050fd73130SBarry Smith PetscFunctionBegin; 1069566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1100fd73130SBarry Smith } 1110fd73130SBarry Smith 11280eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113d71ae5a4SJacob Faibussowitsch { 1144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1154cb9d9b8SBarry Smith 1164cb9d9b8SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1229566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1232e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1269566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 128b9b97703SBarry Smith } 129b9b97703SBarry Smith 1300bad9183SKris Buschelman /*MC 131fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1320bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13311a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1340bad9183SKris Buschelman 1350bad9183SKris Buschelman Operations provided: 13667b8a455SSatish Balay .vb 13711a5261eSBarry Smith MatMult() 13811a5261eSBarry Smith MatMultTranspose() 13911a5261eSBarry Smith MatMultAdd() 14011a5261eSBarry Smith MatMultTransposeAdd() 14167b8a455SSatish Balay .ve 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 145*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 148d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149d71ae5a4SJacob Faibussowitsch { 1504cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15164b52464SHong Zhang PetscMPIInt size; 15282b1193eSBarry Smith 15382b1193eSBarry Smith PetscFunctionBegin; 1544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 155b0a32e0cSBarry Smith A->data = (void *)b; 15626fbe8dcSKarl Rupp 1579566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15826fbe8dcSKarl Rupp 159356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 160f4a53059SBarry Smith 161f4259b30SLisandro Dalcin b->AIJ = NULL; 162cd3bbe55SBarry Smith b->dof = 0; 163f4259b30SLisandro Dalcin b->OAIJ = NULL; 164f4259b30SLisandro Dalcin b->ctx = NULL; 165f4259b30SLisandro Dalcin b->w = NULL; 1669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16764b52464SHong Zhang if (size == 1) { 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16964b52464SHong Zhang } else { 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17164b52464SHong Zhang } 17232e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17332e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith 17780eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline) 17880eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline)) 17980eb40d0SJacob Faibussowitsch #else 18080eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE 18180eb40d0SJacob Faibussowitsch #endif 182bb5b24f5SJacob Faibussowitsch 1839927e305SJacob Faibussowitsch #if defined(__clang__) 184bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 185bb5b24f5SJacob Faibussowitsch #else 186bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL 187bb5b24f5SJacob Faibussowitsch #endif 188bb5b24f5SJacob Faibussowitsch 18980eb40d0SJacob Faibussowitsch enum { 19080eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 19180eb40d0SJacob Faibussowitsch }; 19280eb40d0SJacob Faibussowitsch 19380eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 19480eb40d0SJacob Faibussowitsch // keyword into account for these... 19580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 19680eb40d0SJacob Faibussowitsch { 19780eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 19880eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19980eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 20080eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 20180eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 20280eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 20380eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j; 20480eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i; 20580eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a; 20680eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0; 20780eb40d0SJacob Faibussowitsch const PetscScalar *x; 20880eb40d0SJacob Faibussowitsch PetscScalar *z; 20980eb40d0SJacob Faibussowitsch 21080eb40d0SJacob Faibussowitsch PetscFunctionBegin; 21180eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 21280eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 21380eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21480eb40d0SJacob Faibussowitsch if (mult_add) { 21580eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 21680eb40d0SJacob Faibussowitsch } else { 21780eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z)); 21880eb40d0SJacob Faibussowitsch } 21980eb40d0SJacob Faibussowitsch 22080eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) { 22180eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i]; 22280eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow; 22380eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls 22480eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 22580eb40d0SJacob Faibussowitsch 22680eb40d0SJacob Faibussowitsch nonzerorow += n > 0; 22780eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) { 22880eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow]; 22980eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow]; 23080eb40d0SJacob Faibussowitsch 231bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23280eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 23380eb40d0SJacob Faibussowitsch } 23480eb40d0SJacob Faibussowitsch 235bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23680eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) { 23780eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k; 23880eb40d0SJacob Faibussowitsch 23980eb40d0SJacob Faibussowitsch if (mult_add) { 24080eb40d0SJacob Faibussowitsch z[z_idx] += sum[k]; 24180eb40d0SJacob Faibussowitsch } else { 24280eb40d0SJacob Faibussowitsch z[z_idx] = sum[k]; 24380eb40d0SJacob Faibussowitsch } 24480eb40d0SJacob Faibussowitsch } 24580eb40d0SJacob Faibussowitsch } 24680eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 24780eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24880eb40d0SJacob Faibussowitsch if (mult_add) { 24980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 25080eb40d0SJacob Faibussowitsch } else { 25180eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z)); 25280eb40d0SJacob Faibussowitsch } 25380eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25480eb40d0SJacob Faibussowitsch } 25580eb40d0SJacob Faibussowitsch 25680eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 25780eb40d0SJacob Faibussowitsch { 25880eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 25980eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26080eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 26180eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 26280eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 26380eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 26480eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j; 26580eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i; 26680eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a; 26780eb40d0SJacob Faibussowitsch const PetscScalar *x; 26880eb40d0SJacob Faibussowitsch PetscScalar *z; 26980eb40d0SJacob Faibussowitsch 27080eb40d0SJacob Faibussowitsch PetscFunctionBegin; 27180eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 27280eb40d0SJacob Faibussowitsch if (mult_add) { 27380eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 27480eb40d0SJacob Faibussowitsch } else { 27580eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 27680eb40d0SJacob Faibussowitsch } 27780eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27880eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 27980eb40d0SJacob Faibussowitsch 28080eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) { 28180eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i]; 28280eb40d0SJacob Faibussowitsch const PetscInt *idx = a_j + a_ii; 28380eb40d0SJacob Faibussowitsch const PetscScalar *v = a_a + a_ii; 28480eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii; 28580eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 28680eb40d0SJacob Faibussowitsch 287bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 28880eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 28980eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) { 29080eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j]; 29180eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j]; 29280eb40d0SJacob Faibussowitsch 293bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 29480eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 29580eb40d0SJacob Faibussowitsch } 29680eb40d0SJacob Faibussowitsch } 29780eb40d0SJacob Faibussowitsch 29880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz)); 29980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 30080eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 30180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30280eb40d0SJacob Faibussowitsch } 30380eb40d0SJacob Faibussowitsch 3049927e305SJacob Faibussowitsch #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 3059927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 3069927e305SJacob Faibussowitsch { \ 3079927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3089927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 3099927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3109927e305SJacob Faibussowitsch } \ 3119927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 3129927e305SJacob Faibussowitsch { \ 3139927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3149927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 3159927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3169927e305SJacob Faibussowitsch } \ 3179927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 3189927e305SJacob Faibussowitsch { \ 3199927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3209927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 3219927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 3229927e305SJacob Faibussowitsch } \ 3239927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 3249927e305SJacob Faibussowitsch { \ 3259927e305SJacob Faibussowitsch PetscFunctionBegin; \ 3269927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 3279927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \ 32882b1193eSBarry Smith } 329bcc973b7SBarry Smith 3309927e305SJacob Faibussowitsch // clang-format off 3319927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 3329927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 3339927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 3349927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 3359927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 3369927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 3379927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 3389927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 3399927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 3409927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 3419927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 3429927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 3439927e305SJacob Faibussowitsch // clang-format on 344bcc973b7SBarry Smith 3459927e305SJacob Faibussowitsch #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 346ed1c418cSBarry Smith 34780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 348d71ae5a4SJacob Faibussowitsch { 3496861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3506861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3516861f193SBarry Smith const PetscScalar *x, *v; 3526861f193SBarry Smith PetscScalar *y, *sums; 3536861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3546861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3556861f193SBarry Smith 3566861f193SBarry Smith PetscFunctionBegin; 3579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3589566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 3599566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3606861f193SBarry Smith idx = a->j; 3616861f193SBarry Smith v = a->a; 3626861f193SBarry Smith ii = a->i; 3636861f193SBarry Smith 3646861f193SBarry Smith for (i = 0; i < m; i++) { 3656861f193SBarry Smith jrow = ii[i]; 3666861f193SBarry Smith n = ii[i + 1] - jrow; 3676861f193SBarry Smith sums = y + dof * i; 3686861f193SBarry Smith for (j = 0; j < n; j++) { 369ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 3706861f193SBarry Smith jrow++; 3716861f193SBarry Smith } 3726861f193SBarry Smith } 3736861f193SBarry Smith 3749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 3759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 3773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3786861f193SBarry Smith } 3796861f193SBarry Smith 38080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 381d71ae5a4SJacob Faibussowitsch { 3826861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3836861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 3846861f193SBarry Smith const PetscScalar *x, *v; 3856861f193SBarry Smith PetscScalar *y, *sums; 3866861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 3876861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 3886861f193SBarry Smith 3896861f193SBarry Smith PetscFunctionBegin; 3909566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 3919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3929566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 3936861f193SBarry Smith idx = a->j; 3946861f193SBarry Smith v = a->a; 3956861f193SBarry Smith ii = a->i; 3966861f193SBarry Smith 3976861f193SBarry Smith for (i = 0; i < m; i++) { 3986861f193SBarry Smith jrow = ii[i]; 3996861f193SBarry Smith n = ii[i + 1] - jrow; 4006861f193SBarry Smith sums = y + dof * i; 4016861f193SBarry Smith for (j = 0; j < n; j++) { 402ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 4036861f193SBarry Smith jrow++; 4046861f193SBarry Smith } 4056861f193SBarry Smith } 4066861f193SBarry Smith 4079566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4116861f193SBarry Smith } 4126861f193SBarry Smith 41380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 414d71ae5a4SJacob Faibussowitsch { 4156861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4166861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4176861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4186861f193SBarry Smith PetscScalar *y; 4196861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4206861f193SBarry Smith PetscInt n, i, k; 4216861f193SBarry Smith 4226861f193SBarry Smith PetscFunctionBegin; 4239566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4249566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 4259566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4266861f193SBarry Smith for (i = 0; i < m; i++) { 4276861f193SBarry Smith idx = a->j + a->i[i]; 4286861f193SBarry Smith v = a->a + a->i[i]; 4296861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4306861f193SBarry Smith alpha = x + dof * i; 4316861f193SBarry Smith while (n-- > 0) { 432ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4339371c9d4SSatish Balay idx++; 4349371c9d4SSatish Balay v++; 4356861f193SBarry Smith } 4366861f193SBarry Smith } 4379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4399566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4416861f193SBarry Smith } 4426861f193SBarry Smith 44380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 444d71ae5a4SJacob Faibussowitsch { 4456861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 4466861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 4476861f193SBarry Smith const PetscScalar *x, *v, *alpha; 4486861f193SBarry Smith PetscScalar *y; 4496861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 4506861f193SBarry Smith PetscInt n, i, k; 4516861f193SBarry Smith 4526861f193SBarry Smith PetscFunctionBegin; 4539566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 4549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4559566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 4566861f193SBarry Smith for (i = 0; i < m; i++) { 4576861f193SBarry Smith idx = a->j + a->i[i]; 4586861f193SBarry Smith v = a->a + a->i[i]; 4596861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 4606861f193SBarry Smith alpha = x + dof * i; 4616861f193SBarry Smith while (n-- > 0) { 462ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 4639371c9d4SSatish Balay idx++; 4649371c9d4SSatish Balay v++; 4656861f193SBarry Smith } 4666861f193SBarry Smith } 4679566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 4689566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 4703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4716861f193SBarry Smith } 4726861f193SBarry Smith 47380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 474d71ae5a4SJacob Faibussowitsch { 4754cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 476f4a53059SBarry Smith 477b24ad042SBarry Smith PetscFunctionBegin; 478f4a53059SBarry Smith /* start the scatter */ 4799566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4809566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 4819566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 4829566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 4833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 484f4a53059SBarry Smith } 485f4a53059SBarry Smith 48680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 487d71ae5a4SJacob Faibussowitsch { 4884cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 489b24ad042SBarry Smith 4904cb9d9b8SBarry Smith PetscFunctionBegin; 4919566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 4929566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 4939566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 4949566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 4953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4964cb9d9b8SBarry Smith } 4974cb9d9b8SBarry Smith 49880eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 499d71ae5a4SJacob Faibussowitsch { 5004cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 5014cb9d9b8SBarry Smith 502b24ad042SBarry Smith PetscFunctionBegin; 5034cb9d9b8SBarry Smith /* start the scatter */ 5049566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5059566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 5069566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 5079566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 5083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5094cb9d9b8SBarry Smith } 5104cb9d9b8SBarry Smith 51180eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 512d71ae5a4SJacob Faibussowitsch { 5134cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 514b24ad042SBarry Smith 5154cb9d9b8SBarry Smith PetscFunctionBegin; 5169566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 5179566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 5189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 5203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5214cb9d9b8SBarry Smith } 5224cb9d9b8SBarry Smith 52380eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 524d71ae5a4SJacob Faibussowitsch { 5254222ddf1SHong Zhang Mat_Product *product = C->product; 5264222ddf1SHong Zhang 5274222ddf1SHong Zhang PetscFunctionBegin; 5284222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 5294222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 53098921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 5313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5324222ddf1SHong Zhang } 5334222ddf1SHong Zhang 53480eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 535d71ae5a4SJacob Faibussowitsch { 5364222ddf1SHong Zhang Mat_Product *product = C->product; 5374222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 5384222ddf1SHong Zhang Mat A = product->A, P = product->B; 5394222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 5404222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 5414222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 5424222ddf1SHong Zhang PetscInt nalg = 4; 5434222ddf1SHong Zhang #else 5444222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 5454222ddf1SHong Zhang PetscInt nalg = 5; 5464222ddf1SHong Zhang #endif 5474222ddf1SHong Zhang 5484222ddf1SHong Zhang PetscFunctionBegin; 54908401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 5504222ddf1SHong Zhang 5514222ddf1SHong Zhang /* PtAP */ 5524222ddf1SHong Zhang /* Check matrix local sizes */ 5539371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5549371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 5559371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5569371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 5574222ddf1SHong Zhang 5584222ddf1SHong Zhang /* Set the default algorithm */ 5599566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 56048a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 5614222ddf1SHong Zhang 5624222ddf1SHong Zhang /* Get runtime option */ 563d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 5649566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 56548a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 566d0609cedSBarry Smith PetscOptionsEnd(); 5674222ddf1SHong Zhang 5689566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 5694222ddf1SHong Zhang if (flg) { 5704222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5724222ddf1SHong Zhang } 5734222ddf1SHong Zhang 5749566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 5754222ddf1SHong Zhang if (flg) { 5764222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 5773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5784222ddf1SHong Zhang } 5794222ddf1SHong Zhang 5804222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 5819566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 5829566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 5839566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 5843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5854222ddf1SHong Zhang } 5864222ddf1SHong Zhang 58780eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 58880eb40d0SJacob Faibussowitsch { 58980eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */ 59080eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 59180eb40d0SJacob Faibussowitsch Mat P = pp->AIJ; 59280eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 59380eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 59480eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 59580eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 59680eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj; 59780eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 59880eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 59980eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 60080eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa; 60180eb40d0SJacob Faibussowitsch 60280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 60380eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */ 60480eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 60580eb40d0SJacob Faibussowitsch 60680eb40d0SJacob Faibussowitsch /* Clear old values in C */ 60780eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 60880eb40d0SJacob Faibussowitsch 60980eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) { 61080eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */ 61180eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i]; 61280eb40d0SJacob Faibussowitsch apnzj = 0; 61380eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) { 61480eb40d0SJacob Faibussowitsch /* Get offset within block of P */ 61580eb40d0SJacob Faibussowitsch pshift = *aj % ppdof; 61680eb40d0SJacob Faibussowitsch /* Get block row of P */ 61780eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */ 61880eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow]; 61980eb40d0SJacob Faibussowitsch pjj = pj + pi[prow]; 62080eb40d0SJacob Faibussowitsch paj = pa + pi[prow]; 62180eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) { 62280eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift; 62380eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) { 62480eb40d0SJacob Faibussowitsch apjdense[poffset] = -1; 62580eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset; 62680eb40d0SJacob Faibussowitsch } 62780eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k]; 62880eb40d0SJacob Faibussowitsch } 62980eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 63080eb40d0SJacob Faibussowitsch aa++; 63180eb40d0SJacob Faibussowitsch } 63280eb40d0SJacob Faibussowitsch 63380eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */ 63480eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */ 63580eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 63680eb40d0SJacob Faibussowitsch 63780eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 63880eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */ 63980eb40d0SJacob Faibussowitsch pshift = i % ppdof; 64080eb40d0SJacob Faibussowitsch poffset = pi[prow]; 64180eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset; 64280eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 64380eb40d0SJacob Faibussowitsch pJ = pj + poffset; 64480eb40d0SJacob Faibussowitsch pA = pa + poffset; 64580eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) { 64680eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift; 64780eb40d0SJacob Faibussowitsch cjj = cj + ci[crow]; 64880eb40d0SJacob Faibussowitsch caj = ca + ci[crow]; 64980eb40d0SJacob Faibussowitsch pJ++; 65080eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */ 65180eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) { 65280eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 65380eb40d0SJacob Faibussowitsch } 65480eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 65580eb40d0SJacob Faibussowitsch pA++; 65680eb40d0SJacob Faibussowitsch } 65780eb40d0SJacob Faibussowitsch 65880eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */ 65980eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) { 66080eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.; 66180eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0; 66280eb40d0SJacob Faibussowitsch } 66380eb40d0SJacob Faibussowitsch } 66480eb40d0SJacob Faibussowitsch 66580eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */ 66680eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 66780eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 66880eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 66980eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 67080eb40d0SJacob Faibussowitsch } 67180eb40d0SJacob Faibussowitsch 67280eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 673d71ae5a4SJacob Faibussowitsch { 6740298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 6757ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 6767ba1a0bfSKris Buschelman Mat P = pp->AIJ; 6777ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 678d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 6797ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 680d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 681d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 6827ba1a0bfSKris Buschelman MatScalar *ca; 683d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 6847ba1a0bfSKris Buschelman 6857ba1a0bfSKris Buschelman PetscFunctionBegin; 6867ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 6879566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 6887ba1a0bfSKris Buschelman 6897ba1a0bfSKris Buschelman cn = pn * ppdof; 6907ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 6917ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 6929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 6937ba1a0bfSKris Buschelman ci[0] = 0; 6947ba1a0bfSKris Buschelman 6957ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 6969566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 6979566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 6989566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 6997ba1a0bfSKris Buschelman 7007ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 7017ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 7027ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 7039566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 7047ba1a0bfSKris Buschelman current_space = free_space; 7057ba1a0bfSKris Buschelman 7067ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 7077ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 7087ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 7097ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 7107ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 7117ba1a0bfSKris Buschelman ptanzi = 0; 7127ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 7137ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 7147ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 7157ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 7167ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 7177ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 7187ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 7197ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 7207ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 7217ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 7227ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7237ba1a0bfSKris Buschelman } 7247ba1a0bfSKris Buschelman } 7257ba1a0bfSKris Buschelman } 7267ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7277ba1a0bfSKris Buschelman ptaj = ptasparserow; 7287ba1a0bfSKris Buschelman cnzi = 0; 7297ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 7307ba1a0bfSKris Buschelman /* Get offset within block of P */ 7317ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 7327ba1a0bfSKris Buschelman /* Get block row of P */ 7337ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 7347ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 7357ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 7367ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 7377ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 7387ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 7397ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 7407ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 7417ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 7427ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 7437ba1a0bfSKris Buschelman } 7447ba1a0bfSKris Buschelman } 7457ba1a0bfSKris Buschelman } 7467ba1a0bfSKris Buschelman 7477ba1a0bfSKris Buschelman /* sort sparserow */ 7489566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 7497ba1a0bfSKris Buschelman 7507ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 7517ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 75248a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 7537ba1a0bfSKris Buschelman 7547ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 7559566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 75626fbe8dcSKarl Rupp 7577ba1a0bfSKris Buschelman current_space->array += cnzi; 7587ba1a0bfSKris Buschelman current_space->local_used += cnzi; 7597ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 7607ba1a0bfSKris Buschelman 76126fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 76226fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 76326fbe8dcSKarl Rupp 7647ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7657ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 7667ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 7677ba1a0bfSKris Buschelman } 7687ba1a0bfSKris Buschelman } 7697ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7707ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7717ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 7729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 7739566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 7749566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 7757ba1a0bfSKris Buschelman 7767ba1a0bfSKris Buschelman /* Allocate space for ca */ 7779566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 7787ba1a0bfSKris Buschelman 7797ba1a0bfSKris Buschelman /* put together the new matrix */ 7809566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 7819566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 7827ba1a0bfSKris Buschelman 7837ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7847ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 7854222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 786e6b907acSBarry Smith c->free_a = PETSC_TRUE; 787e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 7887ba1a0bfSKris Buschelman c->nonew = 0; 78926fbe8dcSKarl Rupp 7904222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 7914222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 7927ba1a0bfSKris Buschelman 7937ba1a0bfSKris Buschelman /* Clean up. */ 7949566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 7953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7967ba1a0bfSKris Buschelman } 7977ba1a0bfSKris Buschelman 798d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 799d71ae5a4SJacob Faibussowitsch { 8004222ddf1SHong Zhang Mat_Product *product = C->product; 8014222ddf1SHong Zhang Mat A = product->A, P = product->B; 8022121bac1SHong Zhang 8032121bac1SHong Zhang PetscFunctionBegin; 8049566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 8053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8062121bac1SHong Zhang } 8072121bac1SHong Zhang 808bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 809bc8e477aSFande Kong 810d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 811d71ae5a4SJacob Faibussowitsch { 812bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 813bc8e477aSFande Kong 814bc8e477aSFande Kong PetscFunctionBegin; 81534bcad68SFande Kong 8169566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 8173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 818bc8e477aSFande Kong } 819bc8e477aSFande Kong 8204222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 821bc8e477aSFande Kong 822d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 823d71ae5a4SJacob Faibussowitsch { 824bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 825bc8e477aSFande Kong 826bc8e477aSFande Kong PetscFunctionBegin; 8279566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 8284222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 8293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 830bc8e477aSFande Kong } 831bc8e477aSFande Kong 832bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 833bc8e477aSFande Kong 834d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 835d71ae5a4SJacob Faibussowitsch { 836bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 837bc8e477aSFande Kong 838bc8e477aSFande Kong PetscFunctionBegin; 83934bcad68SFande Kong 8409566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 8413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 842bc8e477aSFande Kong } 843bc8e477aSFande Kong 8444222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 845bc8e477aSFande Kong 846d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 847d71ae5a4SJacob Faibussowitsch { 848bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 849bc8e477aSFande Kong 850bc8e477aSFande Kong PetscFunctionBegin; 85134bcad68SFande Kong 8529566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 8534222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 8543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 855bc8e477aSFande Kong } 856bc8e477aSFande Kong 857d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 858d71ae5a4SJacob Faibussowitsch { 8594222ddf1SHong Zhang Mat_Product *product = C->product; 8604222ddf1SHong Zhang Mat A = product->A, P = product->B; 8614222ddf1SHong Zhang PetscBool flg; 862bc8e477aSFande Kong 863bc8e477aSFande Kong PetscFunctionBegin; 8649566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 8654222ddf1SHong Zhang if (flg) { 8669566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 8674222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 869bc8e477aSFande Kong } 87034bcad68SFande Kong 8719566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 8724222ddf1SHong Zhang if (flg) { 8739566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 8744222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 8753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8764222ddf1SHong Zhang } 87734bcad68SFande Kong 8784222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 879bc8e477aSFande Kong } 880bc8e477aSFande Kong 881d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 882d71ae5a4SJacob Faibussowitsch { 8839c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 884ceb03754SKris Buschelman Mat a = b->AIJ, B; 8859c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 8860fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 887ba8c8a56SBarry Smith PetscInt *cols; 888ba8c8a56SBarry Smith PetscScalar *vals; 8899c4fc4c7SBarry Smith 8909c4fc4c7SBarry Smith PetscFunctionBegin; 8919566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 8929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 8939c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 8949c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 89526fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 8969c4fc4c7SBarry Smith } 8979566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 8989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 8999566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 9009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 9019566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 9029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 9039c4fc4c7SBarry Smith ii = 0; 9049c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 9059566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9060fd73130SBarry Smith for (j = 0; j < dof; j++) { 90726fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 9089566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9099c4fc4c7SBarry Smith ii++; 9109c4fc4c7SBarry Smith } 9119566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 9129c4fc4c7SBarry Smith } 9139566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 9149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 916ceb03754SKris Buschelman 917511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9189566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 919c3d102feSKris Buschelman } else { 920ceb03754SKris Buschelman *newmat = B; 921c3d102feSKris Buschelman } 9223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9239c4fc4c7SBarry Smith } 9249c4fc4c7SBarry Smith 925c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 926be1d678aSKris Buschelman 927d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 928d71ae5a4SJacob Faibussowitsch { 9290fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 930ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 9310fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 9320fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 9330fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 9340fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 9350298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 9360298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 9370fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 9380fd73130SBarry Smith PetscScalar *vals, *ovals; 9390fd73130SBarry Smith 9400fd73130SBarry Smith PetscFunctionBegin; 9419566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 942d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9430fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 9440fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 9450fd73130SBarry Smith for (j = 0; j < dof; j++) { 9460fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 9470fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 9480fd73130SBarry Smith } 9490fd73130SBarry Smith } 9509566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 9519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 9529566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 9539566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 9549566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 9559566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 9560fd73130SBarry Smith 9579566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 958d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 959d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 9600fd73130SBarry Smith garray = mpiaij->garray; 9610fd73130SBarry Smith 9620fd73130SBarry Smith ii = rstart; 963d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 9649566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9659566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9660fd73130SBarry Smith for (j = 0; j < dof; j++) { 967ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 968ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 9699566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 9709566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 9710fd73130SBarry Smith ii++; 9720fd73130SBarry Smith } 9739566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 9749566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 9750fd73130SBarry Smith } 9769566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 9770fd73130SBarry Smith 9789566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9799566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 980ceb03754SKris Buschelman 981511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 9827adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 9837adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 98426fbe8dcSKarl Rupp 9859566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 98626fbe8dcSKarl Rupp 9877adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 988c3d102feSKris Buschelman } else { 989ceb03754SKris Buschelman *newmat = B; 990c3d102feSKris Buschelman } 9913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9920fd73130SBarry Smith } 9930fd73130SBarry Smith 99480eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 995d71ae5a4SJacob Faibussowitsch { 9969e516c8fSBarry Smith Mat A; 9979e516c8fSBarry Smith 9989e516c8fSBarry Smith PetscFunctionBegin; 9999566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 10009566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 10019566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10039e516c8fSBarry Smith } 10040fd73130SBarry Smith 100580eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1006d71ae5a4SJacob Faibussowitsch { 1007ec626240SStefano Zampini Mat A; 1008ec626240SStefano Zampini 1009ec626240SStefano Zampini PetscFunctionBegin; 10109566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 10119566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 10129566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 10133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1014ec626240SStefano Zampini } 1015ec626240SStefano Zampini 1016480dffcfSJed Brown /*@ 10170bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 10180bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 101911a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 102011a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 10210bad9183SKris Buschelman 1022ff585edeSJed Brown Collective 1023ff585edeSJed Brown 1024ff585edeSJed Brown Input Parameters: 102511a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 1026ff585edeSJed Brown - dof - the block size (number of components per node) 1027ff585edeSJed Brown 1028ff585edeSJed Brown Output Parameter: 102911a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 1030ff585edeSJed Brown 10312ef1f0ffSBarry Smith Level: advanced 10322ef1f0ffSBarry Smith 10330bad9183SKris Buschelman Operations provided: 103467b8a455SSatish Balay .vb 103511a5261eSBarry Smith MatMult() 103611a5261eSBarry Smith MatMultTranspose() 103711a5261eSBarry Smith MatMultAdd() 103811a5261eSBarry Smith MatMultTransposeAdd() 103911a5261eSBarry Smith MatView() 104067b8a455SSatish Balay .ve 10410bad9183SKris Buschelman 1042*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1043ff585edeSJed Brown @*/ 1044d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1045d71ae5a4SJacob Faibussowitsch { 1046b24ad042SBarry Smith PetscInt n; 104782b1193eSBarry Smith Mat B; 104890f67b22SBarry Smith PetscBool flg; 1049fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1050fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1051fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1052fdc842d1SBarry Smith #endif 105382b1193eSBarry Smith 105482b1193eSBarry Smith PetscFunctionBegin; 1055fdc842d1SBarry Smith dof = PetscAbs(dof); 10569566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 1057d72c5c08SBarry Smith 105826fbe8dcSKarl Rupp if (dof == 1) *maij = A; 105926fbe8dcSKarl Rupp else { 10609566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1061c248e2fdSStefano Zampini /* propagate vec type */ 10629566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 10639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 10649566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 10659566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 10669566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 10679566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 106826fbe8dcSKarl Rupp 1069cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1070d72c5c08SBarry Smith 107190f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 107290f67b22SBarry Smith if (flg) { 1073feb9fe23SJed Brown Mat_SeqMAIJ *b; 1074feb9fe23SJed Brown 10759566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 107626fbe8dcSKarl Rupp 10770298fd71SBarry Smith B->ops->setup = NULL; 10784cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 10790fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 10804222ddf1SHong Zhang 1081feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 1082b9b97703SBarry Smith b->dof = dof; 10834cb9d9b8SBarry Smith b->AIJ = A; 108426fbe8dcSKarl Rupp 1085d72c5c08SBarry Smith if (dof == 2) { 1086cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1087cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1088cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1089cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1090bcc973b7SBarry Smith } else if (dof == 3) { 1091bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1092bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1093bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1094bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1095bcc973b7SBarry Smith } else if (dof == 4) { 1096bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1097bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1098bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1099bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1100f9fae5adSBarry Smith } else if (dof == 5) { 1101f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1102f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1103f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1104f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 11056cd98798SBarry Smith } else if (dof == 6) { 11066cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 11076cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 11086cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 11096cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1110ed8eea03SBarry Smith } else if (dof == 7) { 1111ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 1112ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1113ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1114ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 111566ed3db0SBarry Smith } else if (dof == 8) { 111666ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 111766ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 111866ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 111966ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 11202b692628SMatthew Knepley } else if (dof == 9) { 11212b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 11222b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 11232b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 11242b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1125b9cda22cSBarry Smith } else if (dof == 10) { 1126b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 1127b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1128b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1129b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1130dbdd7285SBarry Smith } else if (dof == 11) { 1131dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 1132dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1133dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1134dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 11352f7816d4SBarry Smith } else if (dof == 16) { 11362f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 11372f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 11382f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 11392f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1140ed1c418cSBarry Smith } else if (dof == 18) { 1141ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 1142ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1143ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1144ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 114582b1193eSBarry Smith } else { 11466861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 11476861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 11486861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 11496861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 115082b1193eSBarry Smith } 1151fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 11529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1153fdc842d1SBarry Smith #endif 11549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 11559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1156f4a53059SBarry Smith } else { 1157f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1158feb9fe23SJed Brown Mat_MPIMAIJ *b; 1159f4a53059SBarry Smith IS from, to; 1160f4a53059SBarry Smith Vec gvec; 1161f4a53059SBarry Smith 11629566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 116326fbe8dcSKarl Rupp 11640298fd71SBarry Smith B->ops->setup = NULL; 1165d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 11660fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 116726fbe8dcSKarl Rupp 1168b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 1169b9b97703SBarry Smith b->dof = dof; 1170b9b97703SBarry Smith b->A = A; 117126fbe8dcSKarl Rupp 11729566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 11739566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 11744cb9d9b8SBarry Smith 11759566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 11769566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 11779566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 11789566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 11799566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 1180f4a53059SBarry Smith 1181f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 11829566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 11839566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1184f4a53059SBarry Smith 1185f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 11869566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1187f4a53059SBarry Smith 1188f4a53059SBarry Smith /* generate the scatter context */ 11899566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1190f4a53059SBarry Smith 11919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 11929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 11939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1194f4a53059SBarry Smith 1195f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 11964cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 11974cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 11984cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 119926fbe8dcSKarl Rupp 1200fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 12019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1202fdc842d1SBarry Smith #endif 12039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 12049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1205f4a53059SBarry Smith } 12067dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1207ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 12089566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 1209fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1210fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 1211fdc842d1SBarry Smith { 1212fdc842d1SBarry Smith PetscBool flg; 1213fdc842d1SBarry Smith if (convert) { 12149566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 121548a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1216fdc842d1SBarry Smith } 1217fdc842d1SBarry Smith } 1218fdc842d1SBarry Smith #endif 1219cd3bbe55SBarry Smith *maij = B; 1220d72c5c08SBarry Smith } 12213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 122282b1193eSBarry Smith } 1223