1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 611a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 811a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 1111a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 1411a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 1811a5261eSBarry Smith Note: 1911a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 23d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24d71ae5a4SJacob Faibussowitsch { 25ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 26b9b97703SBarry Smith 27b9b97703SBarry Smith PetscFunctionBegin; 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30b9b97703SBarry Smith if (ismpimaij) { 31b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32b9b97703SBarry Smith 33b9b97703SBarry Smith *B = b->A; 34b9b97703SBarry Smith } else if (isseqmaij) { 35b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->AIJ; 38b9b97703SBarry Smith } else { 39b9b97703SBarry Smith *B = A; 40b9b97703SBarry Smith } 413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42b9b97703SBarry Smith } 43b9b97703SBarry Smith 44480dffcfSJed Brown /*@ 4511a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46ff585edeSJed Brown 473f9fe445SBarry Smith Logically Collective 48ff585edeSJed Brown 49d8d19677SJose E. Roman Input Parameters: 5011a5261eSBarry Smith + A - the `MATMAIJ` matrix 51ff585edeSJed Brown - dof - the block size for the new matrix 52ff585edeSJed Brown 53ff585edeSJed Brown Output Parameter: 5411a5261eSBarry Smith . B - the new `MATMAIJ` matrix 55ff585edeSJed Brown 56ff585edeSJed Brown Level: advanced 57ff585edeSJed Brown 5811a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59ff585edeSJed Brown @*/ 60d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61d71ae5a4SJacob Faibussowitsch { 620298fd71SBarry Smith Mat Aij = NULL; 63b9b97703SBarry Smith 64b9b97703SBarry Smith PetscFunctionBegin; 65c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 669566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 679566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 69b9b97703SBarry Smith } 70b9b97703SBarry Smith 7180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72d71ae5a4SJacob Faibussowitsch { 734cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7482b1193eSBarry Smith 7582b1193eSBarry Smith PetscFunctionBegin; 769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 779566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 782e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 824cb9d9b8SBarry Smith } 834cb9d9b8SBarry Smith 8480eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A) 85d71ae5a4SJacob Faibussowitsch { 86356c569eSBarry Smith PetscFunctionBegin; 87ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88356c569eSBarry Smith } 89356c569eSBarry Smith 9080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91d71ae5a4SJacob Faibussowitsch { 920fd73130SBarry Smith Mat B; 930fd73130SBarry Smith 940fd73130SBarry Smith PetscFunctionBegin; 959566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 969566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 990fd73130SBarry Smith } 1000fd73130SBarry Smith 10180eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102d71ae5a4SJacob Faibussowitsch { 1030fd73130SBarry Smith Mat B; 1040fd73130SBarry Smith 1050fd73130SBarry Smith PetscFunctionBegin; 1069566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1100fd73130SBarry Smith } 1110fd73130SBarry Smith 11280eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113d71ae5a4SJacob Faibussowitsch { 1144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1154cb9d9b8SBarry Smith 1164cb9d9b8SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1229566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1232e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1269566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 128b9b97703SBarry Smith } 129b9b97703SBarry Smith 1300bad9183SKris Buschelman /*MC 131fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1320bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13311a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1340bad9183SKris Buschelman 1350bad9183SKris Buschelman Operations provided: 13667b8a455SSatish Balay .vb 13711a5261eSBarry Smith MatMult() 13811a5261eSBarry Smith MatMultTranspose() 13911a5261eSBarry Smith MatMultAdd() 14011a5261eSBarry Smith MatMultTransposeAdd() 14167b8a455SSatish Balay .ve 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 14511a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 148d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149d71ae5a4SJacob Faibussowitsch { 1504cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15164b52464SHong Zhang PetscMPIInt size; 15282b1193eSBarry Smith 15382b1193eSBarry Smith PetscFunctionBegin; 1544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 155b0a32e0cSBarry Smith A->data = (void *)b; 15626fbe8dcSKarl Rupp 1579566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15826fbe8dcSKarl Rupp 159356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 160f4a53059SBarry Smith 161f4259b30SLisandro Dalcin b->AIJ = NULL; 162cd3bbe55SBarry Smith b->dof = 0; 163f4259b30SLisandro Dalcin b->OAIJ = NULL; 164f4259b30SLisandro Dalcin b->ctx = NULL; 165f4259b30SLisandro Dalcin b->w = NULL; 1669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16764b52464SHong Zhang if (size == 1) { 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16964b52464SHong Zhang } else { 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17164b52464SHong Zhang } 17232e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17332e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith 17780eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline) 17880eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline)) 17980eb40d0SJacob Faibussowitsch #else 18080eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE 18180eb40d0SJacob Faibussowitsch #endif 182*bb5b24f5SJacob Faibussowitsch 183*bb5b24f5SJacob Faibussowitsch #if defined(__clang__) || defined(__INTEL_COMPILER) || defined(__ICC) 184*bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 185*bb5b24f5SJacob Faibussowitsch #else 186*bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL 187*bb5b24f5SJacob Faibussowitsch #endif 188*bb5b24f5SJacob Faibussowitsch 18980eb40d0SJacob Faibussowitsch enum { 19080eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 19180eb40d0SJacob Faibussowitsch }; 19280eb40d0SJacob Faibussowitsch 19380eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 19480eb40d0SJacob Faibussowitsch // keyword into account for these... 19580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 19680eb40d0SJacob Faibussowitsch { 19780eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 19880eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 19980eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 20080eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 20180eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 20280eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 20380eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j; 20480eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i; 20580eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a; 20680eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0; 20780eb40d0SJacob Faibussowitsch const PetscScalar *x; 20880eb40d0SJacob Faibussowitsch PetscScalar *z; 20980eb40d0SJacob Faibussowitsch 21080eb40d0SJacob Faibussowitsch PetscFunctionBegin; 21180eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 21280eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 21380eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 21480eb40d0SJacob Faibussowitsch if (mult_add) { 21580eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 21680eb40d0SJacob Faibussowitsch } else { 21780eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z)); 21880eb40d0SJacob Faibussowitsch } 21980eb40d0SJacob Faibussowitsch 22080eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) { 22180eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i]; 22280eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow; 22380eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls 22480eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 22580eb40d0SJacob Faibussowitsch 22680eb40d0SJacob Faibussowitsch nonzerorow += n > 0; 22780eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) { 22880eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow]; 22980eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow]; 23080eb40d0SJacob Faibussowitsch 231*bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23280eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 23380eb40d0SJacob Faibussowitsch } 23480eb40d0SJacob Faibussowitsch 235*bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 23680eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) { 23780eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k; 23880eb40d0SJacob Faibussowitsch 23980eb40d0SJacob Faibussowitsch if (mult_add) { 24080eb40d0SJacob Faibussowitsch z[z_idx] += sum[k]; 24180eb40d0SJacob Faibussowitsch } else { 24280eb40d0SJacob Faibussowitsch z[z_idx] = sum[k]; 24380eb40d0SJacob Faibussowitsch } 24480eb40d0SJacob Faibussowitsch } 24580eb40d0SJacob Faibussowitsch } 24680eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 24780eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 24880eb40d0SJacob Faibussowitsch if (mult_add) { 24980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 25080eb40d0SJacob Faibussowitsch } else { 25180eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z)); 25280eb40d0SJacob Faibussowitsch } 25380eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25480eb40d0SJacob Faibussowitsch } 25580eb40d0SJacob Faibussowitsch 25680eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 25780eb40d0SJacob Faibussowitsch { 25880eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 25980eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 26080eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 26180eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 26280eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 26380eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 26480eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j; 26580eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i; 26680eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a; 26780eb40d0SJacob Faibussowitsch const PetscScalar *x; 26880eb40d0SJacob Faibussowitsch PetscScalar *z; 26980eb40d0SJacob Faibussowitsch 27080eb40d0SJacob Faibussowitsch PetscFunctionBegin; 27180eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 27280eb40d0SJacob Faibussowitsch if (mult_add) { 27380eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 27480eb40d0SJacob Faibussowitsch } else { 27580eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 27680eb40d0SJacob Faibussowitsch } 27780eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 27880eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 27980eb40d0SJacob Faibussowitsch 28080eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) { 28180eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i]; 28280eb40d0SJacob Faibussowitsch const PetscInt *idx = a_j + a_ii; 28380eb40d0SJacob Faibussowitsch const PetscScalar *v = a_a + a_ii; 28480eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii; 28580eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 28680eb40d0SJacob Faibussowitsch 287*bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 28880eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 28980eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) { 29080eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j]; 29180eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j]; 29280eb40d0SJacob Faibussowitsch 293*bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL 29480eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 29580eb40d0SJacob Faibussowitsch } 29680eb40d0SJacob Faibussowitsch } 29780eb40d0SJacob Faibussowitsch 29880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz)); 29980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 30080eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 30180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30280eb40d0SJacob Faibussowitsch } 30380eb40d0SJacob Faibussowitsch 304cd3bbe55SBarry Smith /* -------------------------------------------------------------------------------------- */ 30580eb40d0SJacob Faibussowitsch 30680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 307d71ae5a4SJacob Faibussowitsch { 308bcc973b7SBarry Smith PetscFunctionBegin; 30980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 3103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31182b1193eSBarry Smith } 312bcc973b7SBarry Smith 31380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 314d71ae5a4SJacob Faibussowitsch { 315bcc973b7SBarry Smith PetscFunctionBegin; 31680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 3173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31882b1193eSBarry Smith } 319bcc973b7SBarry Smith 32080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 321d71ae5a4SJacob Faibussowitsch { 322bcc973b7SBarry Smith PetscFunctionBegin; 32380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 3243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32582b1193eSBarry Smith } 32680eb40d0SJacob Faibussowitsch 32780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 328d71ae5a4SJacob Faibussowitsch { 329bcc973b7SBarry Smith PetscFunctionBegin; 33080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 3313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33282b1193eSBarry Smith } 33380eb40d0SJacob Faibussowitsch 334cd3bbe55SBarry Smith /* -------------------------------------------------------------------------------------- */ 33580eb40d0SJacob Faibussowitsch 33680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 337d71ae5a4SJacob Faibussowitsch { 338bcc973b7SBarry Smith PetscFunctionBegin; 33980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 3403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 341bcc973b7SBarry Smith } 342bcc973b7SBarry Smith 34380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 344d71ae5a4SJacob Faibussowitsch { 345bcc973b7SBarry Smith PetscFunctionBegin; 34680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 3473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 348bcc973b7SBarry Smith } 349bcc973b7SBarry Smith 35080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 351d71ae5a4SJacob Faibussowitsch { 352bcc973b7SBarry Smith PetscFunctionBegin; 35380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 3543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 355bcc973b7SBarry Smith } 356bcc973b7SBarry Smith 35780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 358d71ae5a4SJacob Faibussowitsch { 359bcc973b7SBarry Smith PetscFunctionBegin; 36080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 3613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3622b692628SMatthew Knepley } 3632b692628SMatthew Knepley 364b9cda22cSBarry Smith /* ------------------------------------------------------------------------------ */ 365b9cda22cSBarry Smith 36680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 367d71ae5a4SJacob Faibussowitsch { 3682b692628SMatthew Knepley PetscFunctionBegin; 36980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3712b692628SMatthew Knepley } 3722b692628SMatthew Knepley 37380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 374d71ae5a4SJacob Faibussowitsch { 3752b692628SMatthew Knepley PetscFunctionBegin; 37680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 3773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3782b692628SMatthew Knepley } 3792b692628SMatthew Knepley 38080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 381d71ae5a4SJacob Faibussowitsch { 3822b692628SMatthew Knepley PetscFunctionBegin; 38380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 3843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 385b9cda22cSBarry Smith } 386b9cda22cSBarry Smith 38780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 388d71ae5a4SJacob Faibussowitsch { 389b9cda22cSBarry Smith PetscFunctionBegin; 39080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 3913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 392b9cda22cSBarry Smith } 393b9cda22cSBarry Smith 39480eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------ */ 39580eb40d0SJacob Faibussowitsch 39680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 397d71ae5a4SJacob Faibussowitsch { 398b9cda22cSBarry Smith PetscFunctionBegin; 39980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 4003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 401b9cda22cSBarry Smith } 402b9cda22cSBarry Smith 40380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 404d71ae5a4SJacob Faibussowitsch { 405b9cda22cSBarry Smith PetscFunctionBegin; 40680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 40780eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 408b9cda22cSBarry Smith } 40980eb40d0SJacob Faibussowitsch 41080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 41180eb40d0SJacob Faibussowitsch { 41280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 41380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 41480eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 415b9cda22cSBarry Smith } 41680eb40d0SJacob Faibussowitsch 41780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 41880eb40d0SJacob Faibussowitsch { 41980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 42080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 42180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42280eb40d0SJacob Faibussowitsch } 42380eb40d0SJacob Faibussowitsch 42480eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------ */ 42580eb40d0SJacob Faibussowitsch 42680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 42780eb40d0SJacob Faibussowitsch { 42880eb40d0SJacob Faibussowitsch PetscFunctionBegin; 42980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 43080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 43180eb40d0SJacob Faibussowitsch } 43280eb40d0SJacob Faibussowitsch 43380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 43480eb40d0SJacob Faibussowitsch { 43580eb40d0SJacob Faibussowitsch PetscFunctionBegin; 43680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 43780eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 43880eb40d0SJacob Faibussowitsch } 43980eb40d0SJacob Faibussowitsch 44080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 44180eb40d0SJacob Faibussowitsch { 44280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 44380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 44480eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 44580eb40d0SJacob Faibussowitsch } 44680eb40d0SJacob Faibussowitsch 44780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 44880eb40d0SJacob Faibussowitsch { 44980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 45080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 45180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45280eb40d0SJacob Faibussowitsch } 45380eb40d0SJacob Faibussowitsch 45480eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------ */ 45580eb40d0SJacob Faibussowitsch 45680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 45780eb40d0SJacob Faibussowitsch { 45880eb40d0SJacob Faibussowitsch PetscFunctionBegin; 45980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 46080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 46180eb40d0SJacob Faibussowitsch } 46280eb40d0SJacob Faibussowitsch 46380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 46480eb40d0SJacob Faibussowitsch { 46580eb40d0SJacob Faibussowitsch PetscFunctionBegin; 46680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 46780eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 46880eb40d0SJacob Faibussowitsch } 46980eb40d0SJacob Faibussowitsch 47080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 47180eb40d0SJacob Faibussowitsch { 47280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 47380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 47480eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 47580eb40d0SJacob Faibussowitsch } 47680eb40d0SJacob Faibussowitsch 47780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 47880eb40d0SJacob Faibussowitsch { 47980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 48080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 48180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 48280eb40d0SJacob Faibussowitsch } 48380eb40d0SJacob Faibussowitsch 48480eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------ */ 48580eb40d0SJacob Faibussowitsch 48680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 48780eb40d0SJacob Faibussowitsch { 48880eb40d0SJacob Faibussowitsch PetscFunctionBegin; 48980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 49080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49180eb40d0SJacob Faibussowitsch } 49280eb40d0SJacob Faibussowitsch 49380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 49480eb40d0SJacob Faibussowitsch { 49580eb40d0SJacob Faibussowitsch PetscFunctionBegin; 49680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 49780eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49880eb40d0SJacob Faibussowitsch } 49980eb40d0SJacob Faibussowitsch 50080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 50180eb40d0SJacob Faibussowitsch { 50280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 50380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 50480eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 50580eb40d0SJacob Faibussowitsch } 50680eb40d0SJacob Faibussowitsch 50780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 50880eb40d0SJacob Faibussowitsch { 50980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 51080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 51180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51280eb40d0SJacob Faibussowitsch } 51380eb40d0SJacob Faibussowitsch 51480eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------ */ 51580eb40d0SJacob Faibussowitsch 51680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 51780eb40d0SJacob Faibussowitsch { 51880eb40d0SJacob Faibussowitsch PetscFunctionBegin; 51980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 52080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 52180eb40d0SJacob Faibussowitsch } 52280eb40d0SJacob Faibussowitsch 52380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 52480eb40d0SJacob Faibussowitsch { 52580eb40d0SJacob Faibussowitsch PetscFunctionBegin; 52680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 52780eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 52880eb40d0SJacob Faibussowitsch } 52980eb40d0SJacob Faibussowitsch 53080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 53180eb40d0SJacob Faibussowitsch { 53280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 53380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 53480eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 53580eb40d0SJacob Faibussowitsch } 53680eb40d0SJacob Faibussowitsch 53780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 53880eb40d0SJacob Faibussowitsch { 53980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 54080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 54180eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 54280eb40d0SJacob Faibussowitsch } 54380eb40d0SJacob Faibussowitsch 54480eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------ */ 54580eb40d0SJacob Faibussowitsch 54680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 54780eb40d0SJacob Faibussowitsch { 54880eb40d0SJacob Faibussowitsch PetscFunctionBegin; 54980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 55080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 55180eb40d0SJacob Faibussowitsch } 55280eb40d0SJacob Faibussowitsch 55380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 55480eb40d0SJacob Faibussowitsch { 55580eb40d0SJacob Faibussowitsch PetscFunctionBegin; 55680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 55780eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 55880eb40d0SJacob Faibussowitsch } 55980eb40d0SJacob Faibussowitsch 56080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 56180eb40d0SJacob Faibussowitsch { 56280eb40d0SJacob Faibussowitsch PetscFunctionBegin; 56380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 56480eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 56580eb40d0SJacob Faibussowitsch } 56680eb40d0SJacob Faibussowitsch 56780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 56880eb40d0SJacob Faibussowitsch { 56980eb40d0SJacob Faibussowitsch PetscFunctionBegin; 57080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 5713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 572b9cda22cSBarry Smith } 573b9cda22cSBarry Smith 5742f7816d4SBarry Smith /*-------------------------------------------------------------------------------------------- */ 57580eb40d0SJacob Faibussowitsch 57680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 577d71ae5a4SJacob Faibussowitsch { 578dbdd7285SBarry Smith PetscFunctionBegin; 57980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 5803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 581dbdd7285SBarry Smith } 582dbdd7285SBarry Smith 58380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 584d71ae5a4SJacob Faibussowitsch { 585dbdd7285SBarry Smith PetscFunctionBegin; 58680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 5873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 588dbdd7285SBarry Smith } 589dbdd7285SBarry Smith 59080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 591d71ae5a4SJacob Faibussowitsch { 592dbdd7285SBarry Smith PetscFunctionBegin; 59380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 5943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 595dbdd7285SBarry Smith } 596dbdd7285SBarry Smith 59780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 598d71ae5a4SJacob Faibussowitsch { 599dbdd7285SBarry Smith PetscFunctionBegin; 60080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 6013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 602dbdd7285SBarry Smith } 603dbdd7285SBarry Smith 604dbdd7285SBarry Smith /*-------------------------------------------------------------------------------------------- */ 60580eb40d0SJacob Faibussowitsch 60680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 607d71ae5a4SJacob Faibussowitsch { 6082f7816d4SBarry Smith PetscFunctionBegin; 60980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 6103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6112f7816d4SBarry Smith } 6122f7816d4SBarry Smith 61380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 614d71ae5a4SJacob Faibussowitsch { 6152f7816d4SBarry Smith PetscFunctionBegin; 61680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 6173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6182f7816d4SBarry Smith } 6192f7816d4SBarry Smith 62080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 621d71ae5a4SJacob Faibussowitsch { 6222f7816d4SBarry Smith PetscFunctionBegin; 62380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 6243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6252f7816d4SBarry Smith } 6262f7816d4SBarry Smith 62780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 628d71ae5a4SJacob Faibussowitsch { 6292f7816d4SBarry Smith PetscFunctionBegin; 63080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 6313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63266ed3db0SBarry Smith } 63366ed3db0SBarry Smith 634ed1c418cSBarry Smith /*-------------------------------------------------------------------------------------------- */ 63580eb40d0SJacob Faibussowitsch 63680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 637d71ae5a4SJacob Faibussowitsch { 638ed1c418cSBarry Smith PetscFunctionBegin; 63980eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 6403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 641ed1c418cSBarry Smith } 642ed1c418cSBarry Smith 64380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 644d71ae5a4SJacob Faibussowitsch { 645ed1c418cSBarry Smith PetscFunctionBegin; 64680eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 6473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 648ed1c418cSBarry Smith } 649ed1c418cSBarry Smith 65080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 651d71ae5a4SJacob Faibussowitsch { 652ed1c418cSBarry Smith PetscFunctionBegin; 65380eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 6543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 655ed1c418cSBarry Smith } 656ed1c418cSBarry Smith 65780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 658d71ae5a4SJacob Faibussowitsch { 659ed1c418cSBarry Smith PetscFunctionBegin; 66080eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 6613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 662ed1c418cSBarry Smith } 663ed1c418cSBarry Smith 66480eb40d0SJacob Faibussowitsch /*-------------------------------------------------------------------------------------------- */ 66580eb40d0SJacob Faibussowitsch 66680eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 667d71ae5a4SJacob Faibussowitsch { 6686861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 6696861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 6706861f193SBarry Smith const PetscScalar *x, *v; 6716861f193SBarry Smith PetscScalar *y, *sums; 6726861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 6736861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 6746861f193SBarry Smith 6756861f193SBarry Smith PetscFunctionBegin; 6769566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6779566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 6789566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 6796861f193SBarry Smith idx = a->j; 6806861f193SBarry Smith v = a->a; 6816861f193SBarry Smith ii = a->i; 6826861f193SBarry Smith 6836861f193SBarry Smith for (i = 0; i < m; i++) { 6846861f193SBarry Smith jrow = ii[i]; 6856861f193SBarry Smith n = ii[i + 1] - jrow; 6866861f193SBarry Smith sums = y + dof * i; 6876861f193SBarry Smith for (j = 0; j < n; j++) { 688ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 6896861f193SBarry Smith jrow++; 6906861f193SBarry Smith } 6916861f193SBarry Smith } 6926861f193SBarry Smith 6939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 6949566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6959566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 6963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6976861f193SBarry Smith } 6986861f193SBarry Smith 69980eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 700d71ae5a4SJacob Faibussowitsch { 7016861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7026861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 7036861f193SBarry Smith const PetscScalar *x, *v; 7046861f193SBarry Smith PetscScalar *y, *sums; 7056861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 7066861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 7076861f193SBarry Smith 7086861f193SBarry Smith PetscFunctionBegin; 7099566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7109566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7119566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 7126861f193SBarry Smith idx = a->j; 7136861f193SBarry Smith v = a->a; 7146861f193SBarry Smith ii = a->i; 7156861f193SBarry Smith 7166861f193SBarry Smith for (i = 0; i < m; i++) { 7176861f193SBarry Smith jrow = ii[i]; 7186861f193SBarry Smith n = ii[i + 1] - jrow; 7196861f193SBarry Smith sums = y + dof * i; 7206861f193SBarry Smith for (j = 0; j < n; j++) { 721ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 7226861f193SBarry Smith jrow++; 7236861f193SBarry Smith } 7246861f193SBarry Smith } 7256861f193SBarry Smith 7269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 7279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 7293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7306861f193SBarry Smith } 7316861f193SBarry Smith 73280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 733d71ae5a4SJacob Faibussowitsch { 7346861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7356861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 7366861f193SBarry Smith const PetscScalar *x, *v, *alpha; 7376861f193SBarry Smith PetscScalar *y; 7386861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 7396861f193SBarry Smith PetscInt n, i, k; 7406861f193SBarry Smith 7416861f193SBarry Smith PetscFunctionBegin; 7429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7439566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 7449566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 7456861f193SBarry Smith for (i = 0; i < m; i++) { 7466861f193SBarry Smith idx = a->j + a->i[i]; 7476861f193SBarry Smith v = a->a + a->i[i]; 7486861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 7496861f193SBarry Smith alpha = x + dof * i; 7506861f193SBarry Smith while (n-- > 0) { 751ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 7529371c9d4SSatish Balay idx++; 7539371c9d4SSatish Balay v++; 7546861f193SBarry Smith } 7556861f193SBarry Smith } 7569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 7579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7589566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 7593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7606861f193SBarry Smith } 7616861f193SBarry Smith 76280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 763d71ae5a4SJacob Faibussowitsch { 7646861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7656861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 7666861f193SBarry Smith const PetscScalar *x, *v, *alpha; 7676861f193SBarry Smith PetscScalar *y; 7686861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 7696861f193SBarry Smith PetscInt n, i, k; 7706861f193SBarry Smith 7716861f193SBarry Smith PetscFunctionBegin; 7729566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7739566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7749566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 7756861f193SBarry Smith for (i = 0; i < m; i++) { 7766861f193SBarry Smith idx = a->j + a->i[i]; 7776861f193SBarry Smith v = a->a + a->i[i]; 7786861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 7796861f193SBarry Smith alpha = x + dof * i; 7806861f193SBarry Smith while (n-- > 0) { 781ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 7829371c9d4SSatish Balay idx++; 7839371c9d4SSatish Balay v++; 7846861f193SBarry Smith } 7856861f193SBarry Smith } 7869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 7879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 7893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7906861f193SBarry Smith } 7916861f193SBarry Smith 79280eb40d0SJacob Faibussowitsch /*-------------------------------------------------------------------------------------------- */ 79380eb40d0SJacob Faibussowitsch 79480eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 795d71ae5a4SJacob Faibussowitsch { 7964cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 797f4a53059SBarry Smith 798b24ad042SBarry Smith PetscFunctionBegin; 799f4a53059SBarry Smith /* start the scatter */ 8009566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 8019566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 8029566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 8039566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 8043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 805f4a53059SBarry Smith } 806f4a53059SBarry Smith 80780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 808d71ae5a4SJacob Faibussowitsch { 8094cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 810b24ad042SBarry Smith 8114cb9d9b8SBarry Smith PetscFunctionBegin; 8129566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 8139566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 8149566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 8159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 8163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8174cb9d9b8SBarry Smith } 8184cb9d9b8SBarry Smith 81980eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 820d71ae5a4SJacob Faibussowitsch { 8214cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 8224cb9d9b8SBarry Smith 823b24ad042SBarry Smith PetscFunctionBegin; 8244cb9d9b8SBarry Smith /* start the scatter */ 8259566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 8269566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 8279566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 8289566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 8293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8304cb9d9b8SBarry Smith } 8314cb9d9b8SBarry Smith 83280eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 833d71ae5a4SJacob Faibussowitsch { 8344cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 835b24ad042SBarry Smith 8364cb9d9b8SBarry Smith PetscFunctionBegin; 8379566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 8389566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 8399566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 8409566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 8413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8424cb9d9b8SBarry Smith } 8434cb9d9b8SBarry Smith 84495ddefa2SHong Zhang /* ---------------------------------------------------------------- */ 84580eb40d0SJacob Faibussowitsch 84680eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 847d71ae5a4SJacob Faibussowitsch { 8484222ddf1SHong Zhang Mat_Product *product = C->product; 8494222ddf1SHong Zhang 8504222ddf1SHong Zhang PetscFunctionBegin; 8514222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 8524222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 85398921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 8543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8554222ddf1SHong Zhang } 8564222ddf1SHong Zhang 85780eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 858d71ae5a4SJacob Faibussowitsch { 8594222ddf1SHong Zhang Mat_Product *product = C->product; 8604222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 8614222ddf1SHong Zhang Mat A = product->A, P = product->B; 8624222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 8634222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 8644222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 8654222ddf1SHong Zhang PetscInt nalg = 4; 8664222ddf1SHong Zhang #else 8674222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 8684222ddf1SHong Zhang PetscInt nalg = 5; 8694222ddf1SHong Zhang #endif 8704222ddf1SHong Zhang 8714222ddf1SHong Zhang PetscFunctionBegin; 87208401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 8734222ddf1SHong Zhang 8744222ddf1SHong Zhang /* PtAP */ 8754222ddf1SHong Zhang /* Check matrix local sizes */ 8769371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 8779371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 8789371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 8799371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 8804222ddf1SHong Zhang 8814222ddf1SHong Zhang /* Set the default algorithm */ 8829566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 88348a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 8844222ddf1SHong Zhang 8854222ddf1SHong Zhang /* Get runtime option */ 886d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 8879566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 88848a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 889d0609cedSBarry Smith PetscOptionsEnd(); 8904222ddf1SHong Zhang 8919566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 8924222ddf1SHong Zhang if (flg) { 8934222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 8943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8954222ddf1SHong Zhang } 8964222ddf1SHong Zhang 8979566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 8984222ddf1SHong Zhang if (flg) { 8994222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 9003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9014222ddf1SHong Zhang } 9024222ddf1SHong Zhang 9034222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 9049566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 9059566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 9069566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 9073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9084222ddf1SHong Zhang } 9094222ddf1SHong Zhang 9104222ddf1SHong Zhang /* ---------------------------------------------------------------- */ 91180eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 91280eb40d0SJacob Faibussowitsch { 91380eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */ 91480eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 91580eb40d0SJacob Faibussowitsch Mat P = pp->AIJ; 91680eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 91780eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 91880eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 91980eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 92080eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj; 92180eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 92280eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 92380eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 92480eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa; 92580eb40d0SJacob Faibussowitsch 92680eb40d0SJacob Faibussowitsch PetscFunctionBegin; 92780eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */ 92880eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 92980eb40d0SJacob Faibussowitsch 93080eb40d0SJacob Faibussowitsch /* Clear old values in C */ 93180eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 93280eb40d0SJacob Faibussowitsch 93380eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) { 93480eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */ 93580eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i]; 93680eb40d0SJacob Faibussowitsch apnzj = 0; 93780eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) { 93880eb40d0SJacob Faibussowitsch /* Get offset within block of P */ 93980eb40d0SJacob Faibussowitsch pshift = *aj % ppdof; 94080eb40d0SJacob Faibussowitsch /* Get block row of P */ 94180eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */ 94280eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow]; 94380eb40d0SJacob Faibussowitsch pjj = pj + pi[prow]; 94480eb40d0SJacob Faibussowitsch paj = pa + pi[prow]; 94580eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) { 94680eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift; 94780eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) { 94880eb40d0SJacob Faibussowitsch apjdense[poffset] = -1; 94980eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset; 95080eb40d0SJacob Faibussowitsch } 95180eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k]; 95280eb40d0SJacob Faibussowitsch } 95380eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 95480eb40d0SJacob Faibussowitsch aa++; 95580eb40d0SJacob Faibussowitsch } 95680eb40d0SJacob Faibussowitsch 95780eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */ 95880eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */ 95980eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 96080eb40d0SJacob Faibussowitsch 96180eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 96280eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */ 96380eb40d0SJacob Faibussowitsch pshift = i % ppdof; 96480eb40d0SJacob Faibussowitsch poffset = pi[prow]; 96580eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset; 96680eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 96780eb40d0SJacob Faibussowitsch pJ = pj + poffset; 96880eb40d0SJacob Faibussowitsch pA = pa + poffset; 96980eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) { 97080eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift; 97180eb40d0SJacob Faibussowitsch cjj = cj + ci[crow]; 97280eb40d0SJacob Faibussowitsch caj = ca + ci[crow]; 97380eb40d0SJacob Faibussowitsch pJ++; 97480eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */ 97580eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) { 97680eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 97780eb40d0SJacob Faibussowitsch } 97880eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 97980eb40d0SJacob Faibussowitsch pA++; 98080eb40d0SJacob Faibussowitsch } 98180eb40d0SJacob Faibussowitsch 98280eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */ 98380eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) { 98480eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.; 98580eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0; 98680eb40d0SJacob Faibussowitsch } 98780eb40d0SJacob Faibussowitsch } 98880eb40d0SJacob Faibussowitsch 98980eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */ 99080eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 99180eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 99280eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 99380eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 99480eb40d0SJacob Faibussowitsch } 99580eb40d0SJacob Faibussowitsch 99680eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 997d71ae5a4SJacob Faibussowitsch { 9980298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 9997ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 10007ba1a0bfSKris Buschelman Mat P = pp->AIJ; 10017ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 1002d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 10037ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 1004d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 1005d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 10067ba1a0bfSKris Buschelman MatScalar *ca; 1007d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 10087ba1a0bfSKris Buschelman 10097ba1a0bfSKris Buschelman PetscFunctionBegin; 10107ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 10119566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 10127ba1a0bfSKris Buschelman 10137ba1a0bfSKris Buschelman cn = pn * ppdof; 10147ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 10157ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 10169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 10177ba1a0bfSKris Buschelman ci[0] = 0; 10187ba1a0bfSKris Buschelman 10197ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 10209566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 10219566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 10229566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 10237ba1a0bfSKris Buschelman 10247ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 10257ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 10267ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 10279566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 10287ba1a0bfSKris Buschelman current_space = free_space; 10297ba1a0bfSKris Buschelman 10307ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 10317ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 10327ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 10337ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 10347ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 10357ba1a0bfSKris Buschelman ptanzi = 0; 10367ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 10377ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 10387ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 10397ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 10407ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 10417ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 10427ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 10437ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 10447ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 10457ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 10467ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 10477ba1a0bfSKris Buschelman } 10487ba1a0bfSKris Buschelman } 10497ba1a0bfSKris Buschelman } 10507ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 10517ba1a0bfSKris Buschelman ptaj = ptasparserow; 10527ba1a0bfSKris Buschelman cnzi = 0; 10537ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 10547ba1a0bfSKris Buschelman /* Get offset within block of P */ 10557ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 10567ba1a0bfSKris Buschelman /* Get block row of P */ 10577ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 10587ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 10597ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 10607ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 10617ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 10627ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 10637ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 10647ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 10657ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 10667ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 10677ba1a0bfSKris Buschelman } 10687ba1a0bfSKris Buschelman } 10697ba1a0bfSKris Buschelman } 10707ba1a0bfSKris Buschelman 10717ba1a0bfSKris Buschelman /* sort sparserow */ 10729566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 10737ba1a0bfSKris Buschelman 10747ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 10757ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 107648a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 10777ba1a0bfSKris Buschelman 10787ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 10799566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 108026fbe8dcSKarl Rupp 10817ba1a0bfSKris Buschelman current_space->array += cnzi; 10827ba1a0bfSKris Buschelman current_space->local_used += cnzi; 10837ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 10847ba1a0bfSKris Buschelman 108526fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 108626fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 108726fbe8dcSKarl Rupp 10887ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 10897ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 10907ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 10917ba1a0bfSKris Buschelman } 10927ba1a0bfSKris Buschelman } 10937ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 10947ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 10957ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 10969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 10979566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 10989566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 10997ba1a0bfSKris Buschelman 11007ba1a0bfSKris Buschelman /* Allocate space for ca */ 11019566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 11027ba1a0bfSKris Buschelman 11037ba1a0bfSKris Buschelman /* put together the new matrix */ 11049566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 11059566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 11067ba1a0bfSKris Buschelman 11077ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 11087ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 11094222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 1110e6b907acSBarry Smith c->free_a = PETSC_TRUE; 1111e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 11127ba1a0bfSKris Buschelman c->nonew = 0; 111326fbe8dcSKarl Rupp 11144222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 11154222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 11167ba1a0bfSKris Buschelman 11177ba1a0bfSKris Buschelman /* Clean up. */ 11189566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 11193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11207ba1a0bfSKris Buschelman } 11217ba1a0bfSKris Buschelman 1122d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 1123d71ae5a4SJacob Faibussowitsch { 11244222ddf1SHong Zhang Mat_Product *product = C->product; 11254222ddf1SHong Zhang Mat A = product->A, P = product->B; 11262121bac1SHong Zhang 11272121bac1SHong Zhang PetscFunctionBegin; 11289566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 11293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11302121bac1SHong Zhang } 11312121bac1SHong Zhang 1132bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 1133bc8e477aSFande Kong 1134d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 1135d71ae5a4SJacob Faibussowitsch { 1136bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1137bc8e477aSFande Kong 1138bc8e477aSFande Kong PetscFunctionBegin; 113934bcad68SFande Kong 11409566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 11413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1142bc8e477aSFande Kong } 1143bc8e477aSFande Kong 11444222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 1145bc8e477aSFande Kong 1146d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 1147d71ae5a4SJacob Faibussowitsch { 1148bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1149bc8e477aSFande Kong 1150bc8e477aSFande Kong PetscFunctionBegin; 11519566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 11524222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 11533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1154bc8e477aSFande Kong } 1155bc8e477aSFande Kong 1156bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 1157bc8e477aSFande Kong 1158d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 1159d71ae5a4SJacob Faibussowitsch { 1160bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1161bc8e477aSFande Kong 1162bc8e477aSFande Kong PetscFunctionBegin; 116334bcad68SFande Kong 11649566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 11653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1166bc8e477aSFande Kong } 1167bc8e477aSFande Kong 11684222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 1169bc8e477aSFande Kong 1170d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 1171d71ae5a4SJacob Faibussowitsch { 1172bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1173bc8e477aSFande Kong 1174bc8e477aSFande Kong PetscFunctionBegin; 117534bcad68SFande Kong 11769566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 11774222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 11783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1179bc8e477aSFande Kong } 1180bc8e477aSFande Kong 1181d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 1182d71ae5a4SJacob Faibussowitsch { 11834222ddf1SHong Zhang Mat_Product *product = C->product; 11844222ddf1SHong Zhang Mat A = product->A, P = product->B; 11854222ddf1SHong Zhang PetscBool flg; 1186bc8e477aSFande Kong 1187bc8e477aSFande Kong PetscFunctionBegin; 11889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 11894222ddf1SHong Zhang if (flg) { 11909566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 11914222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 11923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1193bc8e477aSFande Kong } 119434bcad68SFande Kong 11959566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 11964222ddf1SHong Zhang if (flg) { 11979566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 11984222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 11993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12004222ddf1SHong Zhang } 120134bcad68SFande Kong 12024222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 1203bc8e477aSFande Kong } 1204bc8e477aSFande Kong 1205d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1206d71ae5a4SJacob Faibussowitsch { 12079c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1208ceb03754SKris Buschelman Mat a = b->AIJ, B; 12099c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 12100fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 1211ba8c8a56SBarry Smith PetscInt *cols; 1212ba8c8a56SBarry Smith PetscScalar *vals; 12139c4fc4c7SBarry Smith 12149c4fc4c7SBarry Smith PetscFunctionBegin; 12159566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 12169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 12179c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 12189c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 121926fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 12209c4fc4c7SBarry Smith } 12219566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 12229566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 12239566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 12249566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 12259566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 12269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 12279c4fc4c7SBarry Smith ii = 0; 12289c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 12299566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 12300fd73130SBarry Smith for (j = 0; j < dof; j++) { 123126fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 12329566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 12339c4fc4c7SBarry Smith ii++; 12349c4fc4c7SBarry Smith } 12359566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 12369c4fc4c7SBarry Smith } 12379566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 12389566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 12399566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1240ceb03754SKris Buschelman 1241511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 12429566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 1243c3d102feSKris Buschelman } else { 1244ceb03754SKris Buschelman *newmat = B; 1245c3d102feSKris Buschelman } 12463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12479c4fc4c7SBarry Smith } 12489c4fc4c7SBarry Smith 1249c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 1250be1d678aSKris Buschelman 1251d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1252d71ae5a4SJacob Faibussowitsch { 12530fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 1254ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 12550fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 12560fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 12570fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 12580fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 12590298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 12600298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 12610fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 12620fd73130SBarry Smith PetscScalar *vals, *ovals; 12630fd73130SBarry Smith 12640fd73130SBarry Smith PetscFunctionBegin; 12659566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 1266d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 12670fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 12680fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 12690fd73130SBarry Smith for (j = 0; j < dof; j++) { 12700fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 12710fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 12720fd73130SBarry Smith } 12730fd73130SBarry Smith } 12749566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 12759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 12769566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 12779566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 12789566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 12799566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 12800fd73130SBarry Smith 12819566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 1282d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 1283d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 12840fd73130SBarry Smith garray = mpiaij->garray; 12850fd73130SBarry Smith 12860fd73130SBarry Smith ii = rstart; 1287d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 12889566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 12899566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 12900fd73130SBarry Smith for (j = 0; j < dof; j++) { 1291ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 1292ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 12939566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 12949566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 12950fd73130SBarry Smith ii++; 12960fd73130SBarry Smith } 12979566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 12989566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 12990fd73130SBarry Smith } 13009566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 13010fd73130SBarry Smith 13029566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 13039566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1304ceb03754SKris Buschelman 1305511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 13067adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 13077adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 130826fbe8dcSKarl Rupp 13099566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 131026fbe8dcSKarl Rupp 13117adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 1312c3d102feSKris Buschelman } else { 1313ceb03754SKris Buschelman *newmat = B; 1314c3d102feSKris Buschelman } 13153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13160fd73130SBarry Smith } 13170fd73130SBarry Smith 131880eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1319d71ae5a4SJacob Faibussowitsch { 13209e516c8fSBarry Smith Mat A; 13219e516c8fSBarry Smith 13229e516c8fSBarry Smith PetscFunctionBegin; 13239566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 13249566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 13259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13279e516c8fSBarry Smith } 13280fd73130SBarry Smith 132980eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1330d71ae5a4SJacob Faibussowitsch { 1331ec626240SStefano Zampini Mat A; 1332ec626240SStefano Zampini 1333ec626240SStefano Zampini PetscFunctionBegin; 13349566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 13359566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 13369566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1338ec626240SStefano Zampini } 1339ec626240SStefano Zampini 1340bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 1341480dffcfSJed Brown /*@ 13420bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 13430bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 134411a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 134511a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 13460bad9183SKris Buschelman 1347ff585edeSJed Brown Collective 1348ff585edeSJed Brown 1349ff585edeSJed Brown Input Parameters: 135011a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 1351ff585edeSJed Brown - dof - the block size (number of components per node) 1352ff585edeSJed Brown 1353ff585edeSJed Brown Output Parameter: 135411a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 1355ff585edeSJed Brown 13560bad9183SKris Buschelman Operations provided: 135767b8a455SSatish Balay .vb 135811a5261eSBarry Smith MatMult() 135911a5261eSBarry Smith MatMultTranspose() 136011a5261eSBarry Smith MatMultAdd() 136111a5261eSBarry Smith MatMultTransposeAdd() 136211a5261eSBarry Smith MatView() 136367b8a455SSatish Balay .ve 13640bad9183SKris Buschelman 13650bad9183SKris Buschelman Level: advanced 13660bad9183SKris Buschelman 136711a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1368ff585edeSJed Brown @*/ 1369d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1370d71ae5a4SJacob Faibussowitsch { 1371b24ad042SBarry Smith PetscInt n; 137282b1193eSBarry Smith Mat B; 137390f67b22SBarry Smith PetscBool flg; 1374fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1375fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1376fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1377fdc842d1SBarry Smith #endif 137882b1193eSBarry Smith 137982b1193eSBarry Smith PetscFunctionBegin; 1380fdc842d1SBarry Smith dof = PetscAbs(dof); 13819566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 1382d72c5c08SBarry Smith 138326fbe8dcSKarl Rupp if (dof == 1) *maij = A; 138426fbe8dcSKarl Rupp else { 13859566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1386c248e2fdSStefano Zampini /* propagate vec type */ 13879566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 13889566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 13899566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 13909566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 13919566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 13929566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 139326fbe8dcSKarl Rupp 1394cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1395d72c5c08SBarry Smith 139690f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 139790f67b22SBarry Smith if (flg) { 1398feb9fe23SJed Brown Mat_SeqMAIJ *b; 1399feb9fe23SJed Brown 14009566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 140126fbe8dcSKarl Rupp 14020298fd71SBarry Smith B->ops->setup = NULL; 14034cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 14040fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 14054222ddf1SHong Zhang 1406feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 1407b9b97703SBarry Smith b->dof = dof; 14084cb9d9b8SBarry Smith b->AIJ = A; 140926fbe8dcSKarl Rupp 1410d72c5c08SBarry Smith if (dof == 2) { 1411cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1412cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1413cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1414cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1415bcc973b7SBarry Smith } else if (dof == 3) { 1416bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1417bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1418bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1419bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1420bcc973b7SBarry Smith } else if (dof == 4) { 1421bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1422bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1423bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1424bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1425f9fae5adSBarry Smith } else if (dof == 5) { 1426f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1427f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1428f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1429f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 14306cd98798SBarry Smith } else if (dof == 6) { 14316cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 14326cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 14336cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 14346cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1435ed8eea03SBarry Smith } else if (dof == 7) { 1436ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 1437ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1438ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1439ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 144066ed3db0SBarry Smith } else if (dof == 8) { 144166ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 144266ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 144366ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 144466ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 14452b692628SMatthew Knepley } else if (dof == 9) { 14462b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 14472b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 14482b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 14492b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1450b9cda22cSBarry Smith } else if (dof == 10) { 1451b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 1452b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1453b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1454b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1455dbdd7285SBarry Smith } else if (dof == 11) { 1456dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 1457dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1458dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1459dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 14602f7816d4SBarry Smith } else if (dof == 16) { 14612f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 14622f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 14632f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 14642f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1465ed1c418cSBarry Smith } else if (dof == 18) { 1466ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 1467ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1468ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1469ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 147082b1193eSBarry Smith } else { 14716861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 14726861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 14736861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 14746861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 147582b1193eSBarry Smith } 1476fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 14779566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1478fdc842d1SBarry Smith #endif 14799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 14809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1481f4a53059SBarry Smith } else { 1482f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1483feb9fe23SJed Brown Mat_MPIMAIJ *b; 1484f4a53059SBarry Smith IS from, to; 1485f4a53059SBarry Smith Vec gvec; 1486f4a53059SBarry Smith 14879566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 148826fbe8dcSKarl Rupp 14890298fd71SBarry Smith B->ops->setup = NULL; 1490d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 14910fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 149226fbe8dcSKarl Rupp 1493b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 1494b9b97703SBarry Smith b->dof = dof; 1495b9b97703SBarry Smith b->A = A; 149626fbe8dcSKarl Rupp 14979566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 14989566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 14994cb9d9b8SBarry Smith 15009566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 15019566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 15029566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 15039566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 15049566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 1505f4a53059SBarry Smith 1506f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 15079566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 15089566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1509f4a53059SBarry Smith 1510f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 15119566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1512f4a53059SBarry Smith 1513f4a53059SBarry Smith /* generate the scatter context */ 15149566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1515f4a53059SBarry Smith 15169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 15179566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 15189566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1519f4a53059SBarry Smith 1520f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 15214cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 15224cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 15234cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 152426fbe8dcSKarl Rupp 1525fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 15269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1527fdc842d1SBarry Smith #endif 15289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 15299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1530f4a53059SBarry Smith } 15317dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1532ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 15339566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 1534fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1535fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 1536fdc842d1SBarry Smith { 1537fdc842d1SBarry Smith PetscBool flg; 1538fdc842d1SBarry Smith if (convert) { 15399566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 154048a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1541fdc842d1SBarry Smith } 1542fdc842d1SBarry Smith } 1543fdc842d1SBarry Smith #endif 1544cd3bbe55SBarry Smith *maij = B; 1545d72c5c08SBarry Smith } 15463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 154782b1193eSBarry Smith } 1548