1be1d678aSKris Buschelman 2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 482b1193eSBarry Smith 5b350b9acSSatish Balay /*@ 611a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7ff585edeSJed Brown 811a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9ff585edeSJed Brown 10ff585edeSJed Brown Input Parameter: 1111a5261eSBarry Smith . A - the `MATMAIJ` matrix 12ff585edeSJed Brown 13ff585edeSJed Brown Output Parameter: 1411a5261eSBarry Smith . B - the `MATAIJ` matrix 15ff585edeSJed Brown 16ff585edeSJed Brown Level: advanced 17ff585edeSJed Brown 1811a5261eSBarry Smith Note: 1911a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20ff585edeSJed Brown 2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22ff585edeSJed Brown @*/ 23d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24d71ae5a4SJacob Faibussowitsch { 25ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij; 26b9b97703SBarry Smith 27b9b97703SBarry Smith PetscFunctionBegin; 289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30b9b97703SBarry Smith if (ismpimaij) { 31b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32b9b97703SBarry Smith 33b9b97703SBarry Smith *B = b->A; 34b9b97703SBarry Smith } else if (isseqmaij) { 35b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36b9b97703SBarry Smith 37b9b97703SBarry Smith *B = b->AIJ; 38b9b97703SBarry Smith } else { 39b9b97703SBarry Smith *B = A; 40b9b97703SBarry Smith } 413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42b9b97703SBarry Smith } 43b9b97703SBarry Smith 44480dffcfSJed Brown /*@ 4511a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46ff585edeSJed Brown 473f9fe445SBarry Smith Logically Collective 48ff585edeSJed Brown 49d8d19677SJose E. Roman Input Parameters: 5011a5261eSBarry Smith + A - the `MATMAIJ` matrix 51ff585edeSJed Brown - dof - the block size for the new matrix 52ff585edeSJed Brown 53ff585edeSJed Brown Output Parameter: 5411a5261eSBarry Smith . B - the new `MATMAIJ` matrix 55ff585edeSJed Brown 56ff585edeSJed Brown Level: advanced 57ff585edeSJed Brown 5811a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59ff585edeSJed Brown @*/ 60d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61d71ae5a4SJacob Faibussowitsch { 620298fd71SBarry Smith Mat Aij = NULL; 63b9b97703SBarry Smith 64b9b97703SBarry Smith PetscFunctionBegin; 65c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2); 669566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij)); 679566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B)); 683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 69b9b97703SBarry Smith } 70b9b97703SBarry Smith 71*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72d71ae5a4SJacob Faibussowitsch { 734cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7482b1193eSBarry Smith 7582b1193eSBarry Smith PetscFunctionBegin; 769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 779566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 782e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 809566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 824cb9d9b8SBarry Smith } 834cb9d9b8SBarry Smith 84*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A) 85d71ae5a4SJacob Faibussowitsch { 86356c569eSBarry Smith PetscFunctionBegin; 87ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88356c569eSBarry Smith } 89356c569eSBarry Smith 90*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91d71ae5a4SJacob Faibussowitsch { 920fd73130SBarry Smith Mat B; 930fd73130SBarry Smith 940fd73130SBarry Smith PetscFunctionBegin; 959566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 969566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 990fd73130SBarry Smith } 1000fd73130SBarry Smith 101*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102d71ae5a4SJacob Faibussowitsch { 1030fd73130SBarry Smith Mat B; 1040fd73130SBarry Smith 1050fd73130SBarry Smith PetscFunctionBegin; 1069566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer)); 1089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1100fd73130SBarry Smith } 1110fd73130SBarry Smith 112*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113d71ae5a4SJacob Faibussowitsch { 1144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 1154cb9d9b8SBarry Smith 1164cb9d9b8SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ)); 1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 1209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx)); 1219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w)); 1229566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1232e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 1259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 1269566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 128b9b97703SBarry Smith } 129b9b97703SBarry Smith 1300bad9183SKris Buschelman /*MC 131fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 1320bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently. 13311a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 1340bad9183SKris Buschelman 1350bad9183SKris Buschelman Operations provided: 13667b8a455SSatish Balay .vb 13711a5261eSBarry Smith MatMult() 13811a5261eSBarry Smith MatMultTranspose() 13911a5261eSBarry Smith MatMultAdd() 14011a5261eSBarry Smith MatMultTransposeAdd() 14167b8a455SSatish Balay .ve 1420bad9183SKris Buschelman 1430bad9183SKris Buschelman Level: advanced 1440bad9183SKris Buschelman 14511a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 1460bad9183SKris Buschelman M*/ 1470bad9183SKris Buschelman 148d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149d71ae5a4SJacob Faibussowitsch { 1504cb9d9b8SBarry Smith Mat_MPIMAIJ *b; 15164b52464SHong Zhang PetscMPIInt size; 15282b1193eSBarry Smith 15382b1193eSBarry Smith PetscFunctionBegin; 1544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 155b0a32e0cSBarry Smith A->data = (void *)b; 15626fbe8dcSKarl Rupp 1579566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 15826fbe8dcSKarl Rupp 159356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ; 160f4a53059SBarry Smith 161f4259b30SLisandro Dalcin b->AIJ = NULL; 162cd3bbe55SBarry Smith b->dof = 0; 163f4259b30SLisandro Dalcin b->OAIJ = NULL; 164f4259b30SLisandro Dalcin b->ctx = NULL; 165f4259b30SLisandro Dalcin b->w = NULL; 1669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 16764b52464SHong Zhang if (size == 1) { 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 16964b52464SHong Zhang } else { 1709566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 17164b52464SHong Zhang } 17232e7c8b0SBarry Smith A->preallocated = PETSC_TRUE; 17332e7c8b0SBarry Smith A->assembled = PETSC_TRUE; 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17582b1193eSBarry Smith } 17682b1193eSBarry Smith 177*80eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline) 178*80eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline)) 179*80eb40d0SJacob Faibussowitsch #else 180*80eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE 181*80eb40d0SJacob Faibussowitsch #endif 182*80eb40d0SJacob Faibussowitsch enum { 183*80eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 184*80eb40d0SJacob Faibussowitsch }; 185*80eb40d0SJacob Faibussowitsch 186*80eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 187*80eb40d0SJacob Faibussowitsch // keyword into account for these... 188*80eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 189*80eb40d0SJacob Faibussowitsch { 190*80eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 191*80eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 192*80eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 193*80eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 194*80eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 195*80eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 196*80eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j; 197*80eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i; 198*80eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a; 199*80eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0; 200*80eb40d0SJacob Faibussowitsch const PetscScalar *x; 201*80eb40d0SJacob Faibussowitsch PetscScalar *z; 202*80eb40d0SJacob Faibussowitsch 203*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 204*80eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 205*80eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 206*80eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 207*80eb40d0SJacob Faibussowitsch if (mult_add) { 208*80eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 209*80eb40d0SJacob Faibussowitsch } else { 210*80eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z)); 211*80eb40d0SJacob Faibussowitsch } 212*80eb40d0SJacob Faibussowitsch 213*80eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) { 214*80eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i]; 215*80eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow; 216*80eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls 217*80eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 218*80eb40d0SJacob Faibussowitsch 219*80eb40d0SJacob Faibussowitsch nonzerorow += n > 0; 220*80eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) { 221*80eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow]; 222*80eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow]; 223*80eb40d0SJacob Faibussowitsch 224*80eb40d0SJacob Faibussowitsch #pragma unroll 225*80eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 226*80eb40d0SJacob Faibussowitsch } 227*80eb40d0SJacob Faibussowitsch 228*80eb40d0SJacob Faibussowitsch #pragma unroll 229*80eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) { 230*80eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k; 231*80eb40d0SJacob Faibussowitsch 232*80eb40d0SJacob Faibussowitsch if (mult_add) { 233*80eb40d0SJacob Faibussowitsch z[z_idx] += sum[k]; 234*80eb40d0SJacob Faibussowitsch } else { 235*80eb40d0SJacob Faibussowitsch z[z_idx] = sum[k]; 236*80eb40d0SJacob Faibussowitsch } 237*80eb40d0SJacob Faibussowitsch } 238*80eb40d0SJacob Faibussowitsch } 239*80eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 240*80eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 241*80eb40d0SJacob Faibussowitsch if (mult_add) { 242*80eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 243*80eb40d0SJacob Faibussowitsch } else { 244*80eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z)); 245*80eb40d0SJacob Faibussowitsch } 246*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 247*80eb40d0SJacob Faibussowitsch } 248*80eb40d0SJacob Faibussowitsch 249*80eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 250*80eb40d0SJacob Faibussowitsch { 251*80eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 252*80eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 253*80eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ; 254*80eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 255*80eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n; 256*80eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz; 257*80eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j; 258*80eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i; 259*80eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a; 260*80eb40d0SJacob Faibussowitsch const PetscScalar *x; 261*80eb40d0SJacob Faibussowitsch PetscScalar *z; 262*80eb40d0SJacob Faibussowitsch 263*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 264*80eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 265*80eb40d0SJacob Faibussowitsch if (mult_add) { 266*80eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 267*80eb40d0SJacob Faibussowitsch } else { 268*80eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0)); 269*80eb40d0SJacob Faibussowitsch } 270*80eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 271*80eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z)); 272*80eb40d0SJacob Faibussowitsch 273*80eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) { 274*80eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i]; 275*80eb40d0SJacob Faibussowitsch const PetscInt *idx = a_j + a_ii; 276*80eb40d0SJacob Faibussowitsch const PetscScalar *v = a_a + a_ii; 277*80eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii; 278*80eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 279*80eb40d0SJacob Faibussowitsch 280*80eb40d0SJacob Faibussowitsch #pragma unroll 281*80eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 282*80eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) { 283*80eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j]; 284*80eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j]; 285*80eb40d0SJacob Faibussowitsch 286*80eb40d0SJacob Faibussowitsch #pragma unroll 287*80eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 288*80eb40d0SJacob Faibussowitsch } 289*80eb40d0SJacob Faibussowitsch } 290*80eb40d0SJacob Faibussowitsch 291*80eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz)); 292*80eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 293*80eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z)); 294*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 295*80eb40d0SJacob Faibussowitsch } 296*80eb40d0SJacob Faibussowitsch 297cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 298*80eb40d0SJacob Faibussowitsch 299*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 300d71ae5a4SJacob Faibussowitsch { 301bcc973b7SBarry Smith PetscFunctionBegin; 302*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 3033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30482b1193eSBarry Smith } 305bcc973b7SBarry Smith 306*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 307d71ae5a4SJacob Faibussowitsch { 308bcc973b7SBarry Smith PetscFunctionBegin; 309*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 3103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31182b1193eSBarry Smith } 312bcc973b7SBarry Smith 313*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 314d71ae5a4SJacob Faibussowitsch { 315bcc973b7SBarry Smith PetscFunctionBegin; 316*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 3173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 31882b1193eSBarry Smith } 319*80eb40d0SJacob Faibussowitsch 320*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 321d71ae5a4SJacob Faibussowitsch { 322bcc973b7SBarry Smith PetscFunctionBegin; 323*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 3243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32582b1193eSBarry Smith } 326*80eb40d0SJacob Faibussowitsch 327cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/ 328*80eb40d0SJacob Faibussowitsch 329*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 330d71ae5a4SJacob Faibussowitsch { 331bcc973b7SBarry Smith PetscFunctionBegin; 332*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 3333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 334bcc973b7SBarry Smith } 335bcc973b7SBarry Smith 336*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 337d71ae5a4SJacob Faibussowitsch { 338bcc973b7SBarry Smith PetscFunctionBegin; 339*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 3403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 341bcc973b7SBarry Smith } 342bcc973b7SBarry Smith 343*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 344d71ae5a4SJacob Faibussowitsch { 345bcc973b7SBarry Smith PetscFunctionBegin; 346*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 3473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 348bcc973b7SBarry Smith } 349bcc973b7SBarry Smith 350*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 351d71ae5a4SJacob Faibussowitsch { 352bcc973b7SBarry Smith PetscFunctionBegin; 353*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 3543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3552b692628SMatthew Knepley } 3562b692628SMatthew Knepley 357b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/ 358b9cda22cSBarry Smith 359*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 360d71ae5a4SJacob Faibussowitsch { 3612b692628SMatthew Knepley PetscFunctionBegin; 362*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 3633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3642b692628SMatthew Knepley } 3652b692628SMatthew Knepley 366*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 367d71ae5a4SJacob Faibussowitsch { 3682b692628SMatthew Knepley PetscFunctionBegin; 369*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3712b692628SMatthew Knepley } 3722b692628SMatthew Knepley 373*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 374d71ae5a4SJacob Faibussowitsch { 3752b692628SMatthew Knepley PetscFunctionBegin; 376*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 3773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 378b9cda22cSBarry Smith } 379b9cda22cSBarry Smith 380*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 381d71ae5a4SJacob Faibussowitsch { 382b9cda22cSBarry Smith PetscFunctionBegin; 383*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 3843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 385b9cda22cSBarry Smith } 386b9cda22cSBarry Smith 387*80eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------*/ 388*80eb40d0SJacob Faibussowitsch 389*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 390d71ae5a4SJacob Faibussowitsch { 391b9cda22cSBarry Smith PetscFunctionBegin; 392*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 394b9cda22cSBarry Smith } 395b9cda22cSBarry Smith 396*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 397d71ae5a4SJacob Faibussowitsch { 398b9cda22cSBarry Smith PetscFunctionBegin; 399*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 400*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 401b9cda22cSBarry Smith } 402*80eb40d0SJacob Faibussowitsch 403*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 404*80eb40d0SJacob Faibussowitsch { 405*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 406*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 407*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 408b9cda22cSBarry Smith } 409*80eb40d0SJacob Faibussowitsch 410*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 411*80eb40d0SJacob Faibussowitsch { 412*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 413*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 414*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 415*80eb40d0SJacob Faibussowitsch } 416*80eb40d0SJacob Faibussowitsch 417*80eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------*/ 418*80eb40d0SJacob Faibussowitsch 419*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 420*80eb40d0SJacob Faibussowitsch { 421*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 422*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 423*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 424*80eb40d0SJacob Faibussowitsch } 425*80eb40d0SJacob Faibussowitsch 426*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 427*80eb40d0SJacob Faibussowitsch { 428*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 429*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 430*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 431*80eb40d0SJacob Faibussowitsch } 432*80eb40d0SJacob Faibussowitsch 433*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 434*80eb40d0SJacob Faibussowitsch { 435*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 436*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 437*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 438*80eb40d0SJacob Faibussowitsch } 439*80eb40d0SJacob Faibussowitsch 440*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 441*80eb40d0SJacob Faibussowitsch { 442*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 443*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 444*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 445*80eb40d0SJacob Faibussowitsch } 446*80eb40d0SJacob Faibussowitsch 447*80eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------*/ 448*80eb40d0SJacob Faibussowitsch 449*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 450*80eb40d0SJacob Faibussowitsch { 451*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 452*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 453*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 454*80eb40d0SJacob Faibussowitsch } 455*80eb40d0SJacob Faibussowitsch 456*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 457*80eb40d0SJacob Faibussowitsch { 458*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 459*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 460*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 461*80eb40d0SJacob Faibussowitsch } 462*80eb40d0SJacob Faibussowitsch 463*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 464*80eb40d0SJacob Faibussowitsch { 465*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 466*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 467*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 468*80eb40d0SJacob Faibussowitsch } 469*80eb40d0SJacob Faibussowitsch 470*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 471*80eb40d0SJacob Faibussowitsch { 472*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 473*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 474*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 475*80eb40d0SJacob Faibussowitsch } 476*80eb40d0SJacob Faibussowitsch 477*80eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------*/ 478*80eb40d0SJacob Faibussowitsch 479*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 480*80eb40d0SJacob Faibussowitsch { 481*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 482*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 483*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 484*80eb40d0SJacob Faibussowitsch } 485*80eb40d0SJacob Faibussowitsch 486*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 487*80eb40d0SJacob Faibussowitsch { 488*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 489*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 490*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 491*80eb40d0SJacob Faibussowitsch } 492*80eb40d0SJacob Faibussowitsch 493*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 494*80eb40d0SJacob Faibussowitsch { 495*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 496*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 497*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 498*80eb40d0SJacob Faibussowitsch } 499*80eb40d0SJacob Faibussowitsch 500*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 501*80eb40d0SJacob Faibussowitsch { 502*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 503*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 504*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 505*80eb40d0SJacob Faibussowitsch } 506*80eb40d0SJacob Faibussowitsch 507*80eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------*/ 508*80eb40d0SJacob Faibussowitsch 509*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 510*80eb40d0SJacob Faibussowitsch { 511*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 512*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 513*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 514*80eb40d0SJacob Faibussowitsch } 515*80eb40d0SJacob Faibussowitsch 516*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 517*80eb40d0SJacob Faibussowitsch { 518*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 519*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 520*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 521*80eb40d0SJacob Faibussowitsch } 522*80eb40d0SJacob Faibussowitsch 523*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 524*80eb40d0SJacob Faibussowitsch { 525*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 526*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 527*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 528*80eb40d0SJacob Faibussowitsch } 529*80eb40d0SJacob Faibussowitsch 530*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 531*80eb40d0SJacob Faibussowitsch { 532*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 533*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 534*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 535*80eb40d0SJacob Faibussowitsch } 536*80eb40d0SJacob Faibussowitsch 537*80eb40d0SJacob Faibussowitsch /* ------------------------------------------------------------------------------*/ 538*80eb40d0SJacob Faibussowitsch 539*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 540*80eb40d0SJacob Faibussowitsch { 541*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 542*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 543*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 544*80eb40d0SJacob Faibussowitsch } 545*80eb40d0SJacob Faibussowitsch 546*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 547*80eb40d0SJacob Faibussowitsch { 548*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 549*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 550*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 551*80eb40d0SJacob Faibussowitsch } 552*80eb40d0SJacob Faibussowitsch 553*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 554*80eb40d0SJacob Faibussowitsch { 555*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 556*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 557*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 558*80eb40d0SJacob Faibussowitsch } 559*80eb40d0SJacob Faibussowitsch 560*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 561*80eb40d0SJacob Faibussowitsch { 562*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 563*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 5643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 565b9cda22cSBarry Smith } 566b9cda22cSBarry Smith 5672f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/ 568*80eb40d0SJacob Faibussowitsch 569*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 570d71ae5a4SJacob Faibussowitsch { 571dbdd7285SBarry Smith PetscFunctionBegin; 572*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 5733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 574dbdd7285SBarry Smith } 575dbdd7285SBarry Smith 576*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 577d71ae5a4SJacob Faibussowitsch { 578dbdd7285SBarry Smith PetscFunctionBegin; 579*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 5803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 581dbdd7285SBarry Smith } 582dbdd7285SBarry Smith 583*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 584d71ae5a4SJacob Faibussowitsch { 585dbdd7285SBarry Smith PetscFunctionBegin; 586*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 5873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 588dbdd7285SBarry Smith } 589dbdd7285SBarry Smith 590*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 591d71ae5a4SJacob Faibussowitsch { 592dbdd7285SBarry Smith PetscFunctionBegin; 593*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 5943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 595dbdd7285SBarry Smith } 596dbdd7285SBarry Smith 597dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/ 598*80eb40d0SJacob Faibussowitsch 599*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 600d71ae5a4SJacob Faibussowitsch { 6012f7816d4SBarry Smith PetscFunctionBegin; 602*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 6033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6042f7816d4SBarry Smith } 6052f7816d4SBarry Smith 606*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 607d71ae5a4SJacob Faibussowitsch { 6082f7816d4SBarry Smith PetscFunctionBegin; 609*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 6103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6112f7816d4SBarry Smith } 6122f7816d4SBarry Smith 613*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 614d71ae5a4SJacob Faibussowitsch { 6152f7816d4SBarry Smith PetscFunctionBegin; 616*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 6173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6182f7816d4SBarry Smith } 6192f7816d4SBarry Smith 620*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 621d71ae5a4SJacob Faibussowitsch { 6222f7816d4SBarry Smith PetscFunctionBegin; 623*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 6243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 62566ed3db0SBarry Smith } 62666ed3db0SBarry Smith 627ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/ 628*80eb40d0SJacob Faibussowitsch 629*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 630d71ae5a4SJacob Faibussowitsch { 631ed1c418cSBarry Smith PetscFunctionBegin; 632*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 6333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 634ed1c418cSBarry Smith } 635ed1c418cSBarry Smith 636*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 637d71ae5a4SJacob Faibussowitsch { 638ed1c418cSBarry Smith PetscFunctionBegin; 639*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 6403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 641ed1c418cSBarry Smith } 642ed1c418cSBarry Smith 643*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 644d71ae5a4SJacob Faibussowitsch { 645ed1c418cSBarry Smith PetscFunctionBegin; 646*80eb40d0SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 6473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 648ed1c418cSBarry Smith } 649ed1c418cSBarry Smith 650*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 651d71ae5a4SJacob Faibussowitsch { 652ed1c418cSBarry Smith PetscFunctionBegin; 653*80eb40d0SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 6543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 655ed1c418cSBarry Smith } 656ed1c418cSBarry Smith 657*80eb40d0SJacob Faibussowitsch /*--------------------------------------------------------------------------------------------*/ 658*80eb40d0SJacob Faibussowitsch 659*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 660d71ae5a4SJacob Faibussowitsch { 6616861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 6626861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 6636861f193SBarry Smith const PetscScalar *x, *v; 6646861f193SBarry Smith PetscScalar *y, *sums; 6656861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 6666861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 6676861f193SBarry Smith 6686861f193SBarry Smith PetscFunctionBegin; 6699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6709566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 6719566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 6726861f193SBarry Smith idx = a->j; 6736861f193SBarry Smith v = a->a; 6746861f193SBarry Smith ii = a->i; 6756861f193SBarry Smith 6766861f193SBarry Smith for (i = 0; i < m; i++) { 6776861f193SBarry Smith jrow = ii[i]; 6786861f193SBarry Smith n = ii[i + 1] - jrow; 6796861f193SBarry Smith sums = y + dof * i; 6806861f193SBarry Smith for (j = 0; j < n; j++) { 681ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 6826861f193SBarry Smith jrow++; 6836861f193SBarry Smith } 6846861f193SBarry Smith } 6856861f193SBarry Smith 6869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 6879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 6893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6906861f193SBarry Smith } 6916861f193SBarry Smith 692*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 693d71ae5a4SJacob Faibussowitsch { 6946861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 6956861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 6966861f193SBarry Smith const PetscScalar *x, *v; 6976861f193SBarry Smith PetscScalar *y, *sums; 6986861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 6996861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k; 7006861f193SBarry Smith 7016861f193SBarry Smith PetscFunctionBegin; 7029566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7039566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7049566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 7056861f193SBarry Smith idx = a->j; 7066861f193SBarry Smith v = a->a; 7076861f193SBarry Smith ii = a->i; 7086861f193SBarry Smith 7096861f193SBarry Smith for (i = 0; i < m; i++) { 7106861f193SBarry Smith jrow = ii[i]; 7116861f193SBarry Smith n = ii[i + 1] - jrow; 7126861f193SBarry Smith sums = y + dof * i; 7136861f193SBarry Smith for (j = 0; j < n; j++) { 714ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 7156861f193SBarry Smith jrow++; 7166861f193SBarry Smith } 7176861f193SBarry Smith } 7186861f193SBarry Smith 7199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 7209566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7219566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 7223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7236861f193SBarry Smith } 7246861f193SBarry Smith 725*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 726d71ae5a4SJacob Faibussowitsch { 7276861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7286861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 7296861f193SBarry Smith const PetscScalar *x, *v, *alpha; 7306861f193SBarry Smith PetscScalar *y; 7316861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 7326861f193SBarry Smith PetscInt n, i, k; 7336861f193SBarry Smith 7346861f193SBarry Smith PetscFunctionBegin; 7359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7369566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 7379566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 7386861f193SBarry Smith for (i = 0; i < m; i++) { 7396861f193SBarry Smith idx = a->j + a->i[i]; 7406861f193SBarry Smith v = a->a + a->i[i]; 7416861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 7426861f193SBarry Smith alpha = x + dof * i; 7436861f193SBarry Smith while (n-- > 0) { 744ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 7459371c9d4SSatish Balay idx++; 7469371c9d4SSatish Balay v++; 7476861f193SBarry Smith } 7486861f193SBarry Smith } 7499566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 7509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7519566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 7523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7536861f193SBarry Smith } 7546861f193SBarry Smith 755*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 756d71ae5a4SJacob Faibussowitsch { 7576861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 7586861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 7596861f193SBarry Smith const PetscScalar *x, *v, *alpha; 7606861f193SBarry Smith PetscScalar *y; 7616861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 7626861f193SBarry Smith PetscInt n, i, k; 7636861f193SBarry Smith 7646861f193SBarry Smith PetscFunctionBegin; 7659566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz)); 7669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 7679566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y)); 7686861f193SBarry Smith for (i = 0; i < m; i++) { 7696861f193SBarry Smith idx = a->j + a->i[i]; 7706861f193SBarry Smith v = a->a + a->i[i]; 7716861f193SBarry Smith n = a->i[i + 1] - a->i[i]; 7726861f193SBarry Smith alpha = x + dof * i; 7736861f193SBarry Smith while (n-- > 0) { 774ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 7759371c9d4SSatish Balay idx++; 7769371c9d4SSatish Balay v++; 7776861f193SBarry Smith } 7786861f193SBarry Smith } 7799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 7809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 7819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y)); 7823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7836861f193SBarry Smith } 7846861f193SBarry Smith 785*80eb40d0SJacob Faibussowitsch /*--------------------------------------------------------------------------------------------*/ 786*80eb40d0SJacob Faibussowitsch 787*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 788d71ae5a4SJacob Faibussowitsch { 7894cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 790f4a53059SBarry Smith 791b24ad042SBarry Smith PetscFunctionBegin; 792f4a53059SBarry Smith /* start the scatter */ 7939566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 7949566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 7959566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 7969566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 7973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 798f4a53059SBarry Smith } 799f4a53059SBarry Smith 800*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 801d71ae5a4SJacob Faibussowitsch { 8024cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 803b24ad042SBarry Smith 8044cb9d9b8SBarry Smith PetscFunctionBegin; 8059566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 8069566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 8079566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 8089566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 8093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8104cb9d9b8SBarry Smith } 8114cb9d9b8SBarry Smith 812*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 813d71ae5a4SJacob Faibussowitsch { 8144cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 8154cb9d9b8SBarry Smith 816b24ad042SBarry Smith PetscFunctionBegin; 8174cb9d9b8SBarry Smith /* start the scatter */ 8189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 8199566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 8209566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 8219566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 8223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8234cb9d9b8SBarry Smith } 8244cb9d9b8SBarry Smith 825*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 826d71ae5a4SJacob Faibussowitsch { 8274cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 828b24ad042SBarry Smith 8294cb9d9b8SBarry Smith PetscFunctionBegin; 8309566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 8319566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 8329566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 8339566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 8343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8354cb9d9b8SBarry Smith } 8364cb9d9b8SBarry Smith 83795ddefa2SHong Zhang /* ----------------------------------------------------------------*/ 838*80eb40d0SJacob Faibussowitsch 839*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 840d71ae5a4SJacob Faibussowitsch { 8414222ddf1SHong Zhang Mat_Product *product = C->product; 8424222ddf1SHong Zhang 8434222ddf1SHong Zhang PetscFunctionBegin; 8444222ddf1SHong Zhang if (product->type == MATPRODUCT_PtAP) { 8454222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 84698921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 8473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8484222ddf1SHong Zhang } 8494222ddf1SHong Zhang 850*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 851d71ae5a4SJacob Faibussowitsch { 8524222ddf1SHong Zhang Mat_Product *product = C->product; 8534222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 8544222ddf1SHong Zhang Mat A = product->A, P = product->B; 8554222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */ 8564222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 8574222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 8584222ddf1SHong Zhang PetscInt nalg = 4; 8594222ddf1SHong Zhang #else 8604222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 8614222ddf1SHong Zhang PetscInt nalg = 5; 8624222ddf1SHong Zhang #endif 8634222ddf1SHong Zhang 8644222ddf1SHong Zhang PetscFunctionBegin; 86508401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 8664222ddf1SHong Zhang 8674222ddf1SHong Zhang /* PtAP */ 8684222ddf1SHong Zhang /* Check matrix local sizes */ 8699371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 8709371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 8719371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 8729371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 8734222ddf1SHong Zhang 8744222ddf1SHong Zhang /* Set the default algorithm */ 8759566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 87648a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 8774222ddf1SHong Zhang 8784222ddf1SHong Zhang /* Get runtime option */ 879d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 8809566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 88148a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 882d0609cedSBarry Smith PetscOptionsEnd(); 8834222ddf1SHong Zhang 8849566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 8854222ddf1SHong Zhang if (flg) { 8864222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 8873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8884222ddf1SHong Zhang } 8894222ddf1SHong Zhang 8909566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 8914222ddf1SHong Zhang if (flg) { 8924222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 8933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8944222ddf1SHong Zhang } 8954222ddf1SHong Zhang 8964222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 8979566063dSJacob Faibussowitsch PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 8989566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 8999566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C)); 9003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9014222ddf1SHong Zhang } 9024222ddf1SHong Zhang 9034222ddf1SHong Zhang /* ----------------------------------------------------------------*/ 904*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 905*80eb40d0SJacob Faibussowitsch { 906*80eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */ 907*80eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 908*80eb40d0SJacob Faibussowitsch Mat P = pp->AIJ; 909*80eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 910*80eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 911*80eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 912*80eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 913*80eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj; 914*80eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 915*80eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 916*80eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 917*80eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa; 918*80eb40d0SJacob Faibussowitsch 919*80eb40d0SJacob Faibussowitsch PetscFunctionBegin; 920*80eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */ 921*80eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 922*80eb40d0SJacob Faibussowitsch 923*80eb40d0SJacob Faibussowitsch /* Clear old values in C */ 924*80eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 925*80eb40d0SJacob Faibussowitsch 926*80eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) { 927*80eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */ 928*80eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i]; 929*80eb40d0SJacob Faibussowitsch apnzj = 0; 930*80eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) { 931*80eb40d0SJacob Faibussowitsch /* Get offset within block of P */ 932*80eb40d0SJacob Faibussowitsch pshift = *aj % ppdof; 933*80eb40d0SJacob Faibussowitsch /* Get block row of P */ 934*80eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */ 935*80eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow]; 936*80eb40d0SJacob Faibussowitsch pjj = pj + pi[prow]; 937*80eb40d0SJacob Faibussowitsch paj = pa + pi[prow]; 938*80eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) { 939*80eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift; 940*80eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) { 941*80eb40d0SJacob Faibussowitsch apjdense[poffset] = -1; 942*80eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset; 943*80eb40d0SJacob Faibussowitsch } 944*80eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k]; 945*80eb40d0SJacob Faibussowitsch } 946*80eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj)); 947*80eb40d0SJacob Faibussowitsch aa++; 948*80eb40d0SJacob Faibussowitsch } 949*80eb40d0SJacob Faibussowitsch 950*80eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */ 951*80eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */ 952*80eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj)); 953*80eb40d0SJacob Faibussowitsch 954*80eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 955*80eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */ 956*80eb40d0SJacob Faibussowitsch pshift = i % ppdof; 957*80eb40d0SJacob Faibussowitsch poffset = pi[prow]; 958*80eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset; 959*80eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 960*80eb40d0SJacob Faibussowitsch pJ = pj + poffset; 961*80eb40d0SJacob Faibussowitsch pA = pa + poffset; 962*80eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) { 963*80eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift; 964*80eb40d0SJacob Faibussowitsch cjj = cj + ci[crow]; 965*80eb40d0SJacob Faibussowitsch caj = ca + ci[crow]; 966*80eb40d0SJacob Faibussowitsch pJ++; 967*80eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */ 968*80eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) { 969*80eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 970*80eb40d0SJacob Faibussowitsch } 971*80eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj)); 972*80eb40d0SJacob Faibussowitsch pA++; 973*80eb40d0SJacob Faibussowitsch } 974*80eb40d0SJacob Faibussowitsch 975*80eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */ 976*80eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) { 977*80eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.; 978*80eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0; 979*80eb40d0SJacob Faibussowitsch } 980*80eb40d0SJacob Faibussowitsch } 981*80eb40d0SJacob Faibussowitsch 982*80eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */ 983*80eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 984*80eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 985*80eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense)); 986*80eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 987*80eb40d0SJacob Faibussowitsch } 988*80eb40d0SJacob Faibussowitsch 989*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 990d71ae5a4SJacob Faibussowitsch { 9910298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 9927ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 9937ba1a0bfSKris Buschelman Mat P = pp->AIJ; 9947ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 995d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ; 9967ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 997d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 998d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 9997ba1a0bfSKris Buschelman MatScalar *ca; 1000d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 10017ba1a0bfSKris Buschelman 10027ba1a0bfSKris Buschelman PetscFunctionBegin; 10037ba1a0bfSKris Buschelman /* Get ij structure of P^T */ 10049566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 10057ba1a0bfSKris Buschelman 10067ba1a0bfSKris Buschelman cn = pn * ppdof; 10077ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 10087ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */ 10099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci)); 10107ba1a0bfSKris Buschelman ci[0] = 0; 10117ba1a0bfSKris Buschelman 10127ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */ 10139566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 10149566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an)); 10159566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn)); 10167ba1a0bfSKris Buschelman 10177ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 10187ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 10197ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 10209566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 10217ba1a0bfSKris Buschelman current_space = free_space; 10227ba1a0bfSKris Buschelman 10237ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */ 10247ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) { 10257ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i]; 10267ba1a0bfSKris Buschelman ptJ = ptj + pti[i]; 10277ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) { 10287ba1a0bfSKris Buschelman ptanzi = 0; 10297ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */ 10307ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) { 10317ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 10327ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof; 10337ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 10347ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow]; 10357ba1a0bfSKris Buschelman ajj = aj + ai[arow]; 10367ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) { 10377ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) { 10387ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1; 10397ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 10407ba1a0bfSKris Buschelman } 10417ba1a0bfSKris Buschelman } 10427ba1a0bfSKris Buschelman } 10437ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 10447ba1a0bfSKris Buschelman ptaj = ptasparserow; 10457ba1a0bfSKris Buschelman cnzi = 0; 10467ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) { 10477ba1a0bfSKris Buschelman /* Get offset within block of P */ 10487ba1a0bfSKris Buschelman pshift = *ptaj % ppdof; 10497ba1a0bfSKris Buschelman /* Get block row of P */ 10507ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */ 10517ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */ 10527ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow]; 10537ba1a0bfSKris Buschelman pjj = pj + pi[prow]; 10547ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) { 10557ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */ 10567ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 10577ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) { 10587ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1; 10597ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift; 10607ba1a0bfSKris Buschelman } 10617ba1a0bfSKris Buschelman } 10627ba1a0bfSKris Buschelman } 10637ba1a0bfSKris Buschelman 10647ba1a0bfSKris Buschelman /* sort sparserow */ 10659566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow)); 10667ba1a0bfSKris Buschelman 10677ba1a0bfSKris Buschelman /* If free space is not available, make more free space */ 10687ba1a0bfSKris Buschelman /* Double the amount of total space in the list */ 106948a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 10707ba1a0bfSKris Buschelman 10717ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */ 10729566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 107326fbe8dcSKarl Rupp 10747ba1a0bfSKris Buschelman current_space->array += cnzi; 10757ba1a0bfSKris Buschelman current_space->local_used += cnzi; 10767ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi; 10777ba1a0bfSKris Buschelman 107826fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 107926fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 108026fbe8dcSKarl Rupp 10817ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 10827ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */ 10837ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 10847ba1a0bfSKris Buschelman } 10857ba1a0bfSKris Buschelman } 10867ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 10877ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */ 10887ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */ 10899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 10909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 10919566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 10927ba1a0bfSKris Buschelman 10937ba1a0bfSKris Buschelman /* Allocate space for ca */ 10949566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 10957ba1a0bfSKris Buschelman 10967ba1a0bfSKris Buschelman /* put together the new matrix */ 10979566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 10989566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof)); 10997ba1a0bfSKris Buschelman 11007ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 11017ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 11024222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 1103e6b907acSBarry Smith c->free_a = PETSC_TRUE; 1104e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 11057ba1a0bfSKris Buschelman c->nonew = 0; 110626fbe8dcSKarl Rupp 11074222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 11084222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 11097ba1a0bfSKris Buschelman 11107ba1a0bfSKris Buschelman /* Clean up. */ 11119566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 11123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11137ba1a0bfSKris Buschelman } 11147ba1a0bfSKris Buschelman 1115d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 1116d71ae5a4SJacob Faibussowitsch { 11174222ddf1SHong Zhang Mat_Product *product = C->product; 11184222ddf1SHong Zhang Mat A = product->A, P = product->B; 11192121bac1SHong Zhang 11202121bac1SHong Zhang PetscFunctionBegin; 11219566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 11223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11232121bac1SHong Zhang } 11242121bac1SHong Zhang 1125bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 1126bc8e477aSFande Kong 1127d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 1128d71ae5a4SJacob Faibussowitsch { 1129bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1130bc8e477aSFande Kong 1131bc8e477aSFande Kong PetscFunctionBegin; 113234bcad68SFande Kong 11339566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 11343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1135bc8e477aSFande Kong } 1136bc8e477aSFande Kong 11374222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 1138bc8e477aSFande Kong 1139d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 1140d71ae5a4SJacob Faibussowitsch { 1141bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1142bc8e477aSFande Kong 1143bc8e477aSFande Kong PetscFunctionBegin; 11449566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 11454222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 11463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1147bc8e477aSFande Kong } 1148bc8e477aSFande Kong 1149bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 1150bc8e477aSFande Kong 1151d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 1152d71ae5a4SJacob Faibussowitsch { 1153bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1154bc8e477aSFande Kong 1155bc8e477aSFande Kong PetscFunctionBegin; 115634bcad68SFande Kong 11579566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 11583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1159bc8e477aSFande Kong } 1160bc8e477aSFande Kong 11614222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 1162bc8e477aSFande Kong 1163d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 1164d71ae5a4SJacob Faibussowitsch { 1165bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1166bc8e477aSFande Kong 1167bc8e477aSFande Kong PetscFunctionBegin; 116834bcad68SFande Kong 11699566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 11704222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 11713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1172bc8e477aSFande Kong } 1173bc8e477aSFande Kong 1174d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 1175d71ae5a4SJacob Faibussowitsch { 11764222ddf1SHong Zhang Mat_Product *product = C->product; 11774222ddf1SHong Zhang Mat A = product->A, P = product->B; 11784222ddf1SHong Zhang PetscBool flg; 1179bc8e477aSFande Kong 1180bc8e477aSFande Kong PetscFunctionBegin; 11819566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 11824222ddf1SHong Zhang if (flg) { 11839566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 11844222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 11853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1186bc8e477aSFande Kong } 118734bcad68SFande Kong 11889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 11894222ddf1SHong Zhang if (flg) { 11909566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 11914222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 11923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11934222ddf1SHong Zhang } 119434bcad68SFande Kong 11954222ddf1SHong Zhang SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 1196bc8e477aSFande Kong } 1197bc8e477aSFande Kong 1198d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1199d71ae5a4SJacob Faibussowitsch { 12009c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1201ceb03754SKris Buschelman Mat a = b->AIJ, B; 12029c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 12030fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 1204ba8c8a56SBarry Smith PetscInt *cols; 1205ba8c8a56SBarry Smith PetscScalar *vals; 12069c4fc4c7SBarry Smith 12079c4fc4c7SBarry Smith PetscFunctionBegin; 12089566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n)); 12099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen)); 12109c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 12119c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]); 121226fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 12139c4fc4c7SBarry Smith } 12149566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 12159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 12169566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 12179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 12189566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen)); 12199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols)); 12209c4fc4c7SBarry Smith ii = 0; 12219c4fc4c7SBarry Smith for (i = 0; i < m; i++) { 12229566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 12230fd73130SBarry Smith for (j = 0; j < dof; j++) { 122426fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 12259566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 12269c4fc4c7SBarry Smith ii++; 12279c4fc4c7SBarry Smith } 12289566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 12299c4fc4c7SBarry Smith } 12309566063dSJacob Faibussowitsch PetscCall(PetscFree(icols)); 12319566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 12329566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1233ceb03754SKris Buschelman 1234511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 12359566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 1236c3d102feSKris Buschelman } else { 1237ceb03754SKris Buschelman *newmat = B; 1238c3d102feSKris Buschelman } 12393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 12409c4fc4c7SBarry Smith } 12419c4fc4c7SBarry Smith 1242c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 1243be1d678aSKris Buschelman 1244d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1245d71ae5a4SJacob Faibussowitsch { 12460fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 1247ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 12480fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 12490fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 12500fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 12510fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 12520298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 12530298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 12540fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k; 12550fd73130SBarry Smith PetscScalar *vals, *ovals; 12560fd73130SBarry Smith 12570fd73130SBarry Smith PetscFunctionBegin; 12589566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 1259d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 12600fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]); 12610fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]); 12620fd73130SBarry Smith for (j = 0; j < dof; j++) { 12630fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i]; 12640fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i]; 12650fd73130SBarry Smith } 12660fd73130SBarry Smith } 12679566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 12689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 12699566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype)); 12709566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 12719566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof)); 12729566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz)); 12730fd73130SBarry Smith 12749566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 1275d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart; 1276d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart; 12770fd73130SBarry Smith garray = mpiaij->garray; 12780fd73130SBarry Smith 12790fd73130SBarry Smith ii = rstart; 1280d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) { 12819566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 12829566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 12830fd73130SBarry Smith for (j = 0; j < dof; j++) { 1284ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 1285ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 12869566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 12879566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 12880fd73130SBarry Smith ii++; 12890fd73130SBarry Smith } 12909566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 12919566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 12920fd73130SBarry Smith } 12939566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols)); 12940fd73130SBarry Smith 12959566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 12969566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1297ceb03754SKris Buschelman 1298511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 12997adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 13007adad957SLisandro Dalcin ((PetscObject)A)->refct = 1; 130126fbe8dcSKarl Rupp 13029566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 130326fbe8dcSKarl Rupp 13047adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 1305c3d102feSKris Buschelman } else { 1306ceb03754SKris Buschelman *newmat = B; 1307c3d102feSKris Buschelman } 13083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13090fd73130SBarry Smith } 13100fd73130SBarry Smith 1311*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1312d71ae5a4SJacob Faibussowitsch { 13139e516c8fSBarry Smith Mat A; 13149e516c8fSBarry Smith 13159e516c8fSBarry Smith PetscFunctionBegin; 13169566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 13179566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 13189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13209e516c8fSBarry Smith } 13210fd73130SBarry Smith 1322*80eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1323d71ae5a4SJacob Faibussowitsch { 1324ec626240SStefano Zampini Mat A; 1325ec626240SStefano Zampini 1326ec626240SStefano Zampini PetscFunctionBegin; 13279566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 13289566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 13299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1331ec626240SStefano Zampini } 1332ec626240SStefano Zampini 1333bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */ 1334480dffcfSJed Brown /*@ 13350bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 13360bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same 133711a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 133811a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices. 13390bad9183SKris Buschelman 1340ff585edeSJed Brown Collective 1341ff585edeSJed Brown 1342ff585edeSJed Brown Input Parameters: 134311a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks 1344ff585edeSJed Brown - dof - the block size (number of components per node) 1345ff585edeSJed Brown 1346ff585edeSJed Brown Output Parameter: 134711a5261eSBarry Smith . maij - the new `MATMAIJ` matrix 1348ff585edeSJed Brown 13490bad9183SKris Buschelman Operations provided: 135067b8a455SSatish Balay .vb 135111a5261eSBarry Smith MatMult() 135211a5261eSBarry Smith MatMultTranspose() 135311a5261eSBarry Smith MatMultAdd() 135411a5261eSBarry Smith MatMultTransposeAdd() 135511a5261eSBarry Smith MatView() 135667b8a455SSatish Balay .ve 13570bad9183SKris Buschelman 13580bad9183SKris Buschelman Level: advanced 13590bad9183SKris Buschelman 136011a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1361ff585edeSJed Brown @*/ 1362d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1363d71ae5a4SJacob Faibussowitsch { 1364b24ad042SBarry Smith PetscInt n; 136582b1193eSBarry Smith Mat B; 136690f67b22SBarry Smith PetscBool flg; 1367fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1368fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1369fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1370fdc842d1SBarry Smith #endif 137182b1193eSBarry Smith 137282b1193eSBarry Smith PetscFunctionBegin; 1373fdc842d1SBarry Smith dof = PetscAbs(dof); 13749566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A)); 1375d72c5c08SBarry Smith 137626fbe8dcSKarl Rupp if (dof == 1) *maij = A; 137726fbe8dcSKarl Rupp else { 13789566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1379c248e2fdSStefano Zampini /* propagate vec type */ 13809566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype)); 13819566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 13829566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 13839566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 13849566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 13859566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 138626fbe8dcSKarl Rupp 1387cd3bbe55SBarry Smith B->assembled = PETSC_TRUE; 1388d72c5c08SBarry Smith 138990f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 139090f67b22SBarry Smith if (flg) { 1391feb9fe23SJed Brown Mat_SeqMAIJ *b; 1392feb9fe23SJed Brown 13939566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ)); 139426fbe8dcSKarl Rupp 13950298fd71SBarry Smith B->ops->setup = NULL; 13964cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ; 13970fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ; 13984222ddf1SHong Zhang 1399feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data; 1400b9b97703SBarry Smith b->dof = dof; 14014cb9d9b8SBarry Smith b->AIJ = A; 140226fbe8dcSKarl Rupp 1403d72c5c08SBarry Smith if (dof == 2) { 1404cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2; 1405cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1406cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1407cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1408bcc973b7SBarry Smith } else if (dof == 3) { 1409bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3; 1410bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1411bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1412bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1413bcc973b7SBarry Smith } else if (dof == 4) { 1414bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4; 1415bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1416bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1417bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1418f9fae5adSBarry Smith } else if (dof == 5) { 1419f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5; 1420f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1421f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1422f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 14236cd98798SBarry Smith } else if (dof == 6) { 14246cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6; 14256cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6; 14266cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 14276cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1428ed8eea03SBarry Smith } else if (dof == 7) { 1429ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7; 1430ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1431ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1432ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 143366ed3db0SBarry Smith } else if (dof == 8) { 143466ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8; 143566ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8; 143666ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 143766ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 14382b692628SMatthew Knepley } else if (dof == 9) { 14392b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9; 14402b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9; 14412b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 14422b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1443b9cda22cSBarry Smith } else if (dof == 10) { 1444b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10; 1445b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1446b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1447b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1448dbdd7285SBarry Smith } else if (dof == 11) { 1449dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11; 1450dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1451dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1452dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 14532f7816d4SBarry Smith } else if (dof == 16) { 14542f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16; 14552f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16; 14562f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 14572f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1458ed1c418cSBarry Smith } else if (dof == 18) { 1459ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18; 1460ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1461ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1462ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 146382b1193eSBarry Smith } else { 14646861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N; 14656861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N; 14666861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 14676861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 146882b1193eSBarry Smith } 1469fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 14709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1471fdc842d1SBarry Smith #endif 14729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 14739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1474f4a53059SBarry Smith } else { 1475f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1476feb9fe23SJed Brown Mat_MPIMAIJ *b; 1477f4a53059SBarry Smith IS from, to; 1478f4a53059SBarry Smith Vec gvec; 1479f4a53059SBarry Smith 14809566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ)); 148126fbe8dcSKarl Rupp 14820298fd71SBarry Smith B->ops->setup = NULL; 1483d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ; 14840fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ; 148526fbe8dcSKarl Rupp 1486b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data; 1487b9b97703SBarry Smith b->dof = dof; 1488b9b97703SBarry Smith b->A = A; 148926fbe8dcSKarl Rupp 14909566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 14919566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 14924cb9d9b8SBarry Smith 14939566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n)); 14949566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 14959566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 14969566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof)); 14979566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ)); 1498f4a53059SBarry Smith 1499f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */ 15009566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 15019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1502f4a53059SBarry Smith 1503f4a53059SBarry Smith /* create temporary global vector to generate scatter context */ 15049566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1505f4a53059SBarry Smith 1506f4a53059SBarry Smith /* generate the scatter context */ 15079566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1508f4a53059SBarry Smith 15099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 15109566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 15119566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1512f4a53059SBarry Smith 1513f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof; 15144cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 15154cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 15164cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 151726fbe8dcSKarl Rupp 1518fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 15199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1520fdc842d1SBarry Smith #endif 15219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 15229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1523f4a53059SBarry Smith } 15247dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1525ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 15269566063dSJacob Faibussowitsch PetscCall(MatSetUp(B)); 1527fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA) 1528fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */ 1529fdc842d1SBarry Smith { 1530fdc842d1SBarry Smith PetscBool flg; 1531fdc842d1SBarry Smith if (convert) { 15329566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 153348a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1534fdc842d1SBarry Smith } 1535fdc842d1SBarry Smith } 1536fdc842d1SBarry Smith #endif 1537cd3bbe55SBarry Smith *maij = B; 1538d72c5c08SBarry Smith } 15393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 154082b1193eSBarry Smith } 1541