156bd94f4SPierre Jolivet static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 256bd94f4SPierre Jolivet 356bd94f4SPierre Jolivet /* 456bd94f4SPierre Jolivet See the paper below for more information 556bd94f4SPierre Jolivet 656bd94f4SPierre Jolivet "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 756bd94f4SPierre Jolivet P. Jolivet, J. E. Roman, and S. Zampini (2020). 856bd94f4SPierre Jolivet */ 956bd94f4SPierre Jolivet 1056bd94f4SPierre Jolivet #include <petsc.h> 1156bd94f4SPierre Jolivet 12b88df2e7SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1356bd94f4SPierre Jolivet #include <mkl.h> 1456bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do { \ 1556bd94f4SPierre Jolivet sparse_status_t __ierr; \ 1656bd94f4SPierre Jolivet PetscStackPush(#func); \ 1756bd94f4SPierre Jolivet __ierr = func args; \ 1856bd94f4SPierre Jolivet PetscStackPop; \ 192c71b3e2SJacob Faibussowitsch PetscCheckFalse(__ierr != SPARSE_STATUS_SUCCESS,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 2056bd94f4SPierre Jolivet } while (0) 2156bd94f4SPierre Jolivet #else 2256bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do { \ 2356bd94f4SPierre Jolivet SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 2456bd94f4SPierre Jolivet } while (0) 2556bd94f4SPierre Jolivet #endif 2656bd94f4SPierre Jolivet 27956f8c0dSBarry Smith int main(int argc, char** argv) 28956f8c0dSBarry Smith { 2956bd94f4SPierre Jolivet Mat A, C, D, E; 3056bd94f4SPierre Jolivet PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 3156bd94f4SPierre Jolivet PetscViewer viewer; 3256bd94f4SPierre Jolivet PetscInt bs[10], N[8]; 3356bd94f4SPierre Jolivet char *type[10]; 3456bd94f4SPierre Jolivet PetscMPIInt size; 3556bd94f4SPierre Jolivet PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 3656bd94f4SPierre Jolivet char file[PETSC_MAX_PATH_LEN]; 3756bd94f4SPierre Jolivet 38*9566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 39*9566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 402c71b3e2SJacob Faibussowitsch PetscCheckFalse(size != 1,PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 41*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 4228b400f6SJacob Faibussowitsch PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 43*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 44*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 4556bd94f4SPierre Jolivet if (!flg) { 4656bd94f4SPierre Jolivet nbs = 1; 4756bd94f4SPierre Jolivet bs[0] = 1; 4856bd94f4SPierre Jolivet } 49*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 5056bd94f4SPierre Jolivet if (!flg) { 5156bd94f4SPierre Jolivet nN = 8; 5256bd94f4SPierre Jolivet N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 5356bd94f4SPierre Jolivet N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 5456bd94f4SPierre Jolivet } 55*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 5656bd94f4SPierre Jolivet if (!flg) { 5756bd94f4SPierre Jolivet ntype = 1; 58*9566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0])); 5956bd94f4SPierre Jolivet } 60*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 61*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL)); 62*9566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 6356bd94f4SPierre Jolivet for (PetscInt j = 0; j < nbs; ++j) { 64*9566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 65*9566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 66*9566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 67*9566063dSJacob Faibussowitsch PetscCall(MatLoad(A, viewer)); 68*9566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 69*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 7028b400f6SJacob Faibussowitsch PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 71*9566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &M)); 7256bd94f4SPierre Jolivet if (m == M) { 7356bd94f4SPierre Jolivet Mat oA; 74*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 75*9566063dSJacob Faibussowitsch PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 76*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&oA)); 7756bd94f4SPierre Jolivet } 78*9566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 79*9566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 8056bd94f4SPierre Jolivet if (bs[j] > 1) { 8156bd94f4SPierre Jolivet Mat T, Tt, B; 8256bd94f4SPierre Jolivet const PetscScalar *ptr; 8356bd94f4SPierre Jolivet PetscScalar *val, *Aa; 8456bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 8556bd94f4SPierre Jolivet PetscInt An, i, k; 8656bd94f4SPierre Jolivet PetscBool done; 8756bd94f4SPierre Jolivet 88*9566063dSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 89*9566063dSJacob Faibussowitsch PetscCall(MatSetRandom(T, NULL)); 90*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 91*9566063dSJacob Faibussowitsch PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 92*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Tt)); 93*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(T, &ptr)); 94*9566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 952c71b3e2SJacob Faibussowitsch PetscCheckFalse(!done || An != m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 96*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &Aa)); 97*9566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &B)); 98*9566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQBAIJ)); 99*9566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 100*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 10156bd94f4SPierre Jolivet for (i = 0; i < Ai[An]; ++i) 10256bd94f4SPierre Jolivet for (k = 0; k < bs[j] * bs[j]; ++k) 10356bd94f4SPierre Jolivet val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 104*9566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 105*9566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 106*9566063dSJacob Faibussowitsch PetscCall(PetscFree(val)); 107*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 108*9566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 109*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(T, &ptr)); 110*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 111*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 11256bd94f4SPierre Jolivet A = B; 11356bd94f4SPierre Jolivet } 11456bd94f4SPierre Jolivet /* reconvert back to SeqAIJ before converting to the desired type later */ 11556bd94f4SPierre Jolivet if (!convert) E = A; 116*9566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 117*9566063dSJacob Faibussowitsch PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 11856bd94f4SPierre Jolivet for (PetscInt i = 0; i < ntype; ++i) { 11956bd94f4SPierre Jolivet char *tmp; 12056bd94f4SPierre Jolivet PetscInt *ia_ptr, *ja_ptr, k; 12156bd94f4SPierre Jolivet PetscScalar *a_ptr; 122b88df2e7SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 12356bd94f4SPierre Jolivet struct matrix_descr descr; 12456bd94f4SPierre Jolivet sparse_matrix_t spr; 12556bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_GENERAL; 12656bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 12756bd94f4SPierre Jolivet #endif 12856bd94f4SPierre Jolivet if (convert) { 129*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13056bd94f4SPierre Jolivet } 131*9566063dSJacob Faibussowitsch PetscCall(PetscStrstr(type[i], "mkl", &tmp)); 13256bd94f4SPierre Jolivet if (tmp) { 13356bd94f4SPierre Jolivet size_t mlen, tlen; 13456bd94f4SPierre Jolivet char base[256]; 13556bd94f4SPierre Jolivet 13656bd94f4SPierre Jolivet mkl = PETSC_TRUE; 137*9566063dSJacob Faibussowitsch PetscCall(PetscStrlen(tmp, &mlen)); 138*9566063dSJacob Faibussowitsch PetscCall(PetscStrlen(type[i], &tlen)); 139*9566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(base, type[i], tlen-mlen + 1)); 140*9566063dSJacob Faibussowitsch PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 14156bd94f4SPierre Jolivet } else { 14256bd94f4SPierre Jolivet mkl = PETSC_FALSE; 143*9566063dSJacob Faibussowitsch PetscCall(PetscStrstr(type[i], "maij", &tmp)); 14456bd94f4SPierre Jolivet if (!tmp) { 145*9566063dSJacob Faibussowitsch PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 14656bd94f4SPierre Jolivet } else { 147*9566063dSJacob Faibussowitsch PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 14856bd94f4SPierre Jolivet maij = PETSC_TRUE; 14956bd94f4SPierre Jolivet } 15056bd94f4SPierre Jolivet } 151*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 15256bd94f4SPierre Jolivet if (mkl) { 15356bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 15456bd94f4SPierre Jolivet PetscInt An; 15556bd94f4SPierre Jolivet PetscBool done; 15656bd94f4SPierre Jolivet 157*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 15828b400f6SJacob Faibussowitsch PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 159*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 160*9566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 16128b400f6SJacob Faibussowitsch PetscCheck(done,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 162*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(An + 1, &ia_ptr)); 163*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ai[An], &ja_ptr)); 16456bd94f4SPierre Jolivet if (flg) { /* SeqAIJ */ 16556bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 16656bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 167*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &a_ptr)); 16856bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 16956bd94f4SPierre Jolivet } else { 170*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 17156bd94f4SPierre Jolivet if (flg) { 17256bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 17356bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 174*9566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJGetArray(A, &a_ptr)); 17556bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 17656bd94f4SPierre Jolivet } else { 177*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 17856bd94f4SPierre Jolivet if (flg) { 17956bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 18056bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 181*9566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJGetArray(A, &a_ptr)); 18256bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 183b88df2e7SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 18456bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 18556bd94f4SPierre Jolivet descr.mode = SPARSE_FILL_MODE_UPPER; 18656bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 18756bd94f4SPierre Jolivet #endif 18856bd94f4SPierre Jolivet } 18956bd94f4SPierre Jolivet } 19056bd94f4SPierre Jolivet } 191*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 192*9566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 19356bd94f4SPierre Jolivet } 19456bd94f4SPierre Jolivet 195*9566063dSJacob Faibussowitsch PetscCall(MatViewFromOptions(A, NULL, "-A_view")); 19656bd94f4SPierre Jolivet 19756bd94f4SPierre Jolivet for (k = 0; k < nN; ++k) { 19856bd94f4SPierre Jolivet MatType Atype, Ctype; 19956bd94f4SPierre Jolivet PetscInt AM, AN, CM, CN, t; 200956f8c0dSBarry Smith #if defined(PETSC_USE_LOG) 20156bd94f4SPierre Jolivet PetscLogStage stage, tstage; 20256bd94f4SPierre Jolivet char stage_s[256]; 203956f8c0dSBarry Smith #endif 20456bd94f4SPierre Jolivet 205*9566063dSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 206*9566063dSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 207*9566063dSJacob Faibussowitsch PetscCall(MatSetRandom(C, NULL)); 20856bd94f4SPierre Jolivet if (cuda) { /* convert to GPU if needed */ 209*9566063dSJacob Faibussowitsch PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 210*9566063dSJacob Faibussowitsch PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 21156bd94f4SPierre Jolivet } 21256bd94f4SPierre Jolivet if (mkl) { 21356bd94f4SPierre Jolivet if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 21456bd94f4SPierre Jolivet else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 21556bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 21656bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 21756bd94f4SPierre Jolivet } 218*9566063dSJacob Faibussowitsch PetscCall(MatGetType(A, &Atype)); 219*9566063dSJacob Faibussowitsch PetscCall(MatGetType(C, &Ctype)); 220*9566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &AM, &AN)); 221*9566063dSJacob Faibussowitsch PetscCall(MatGetSize(C, &CM, &CN)); 22256bd94f4SPierre Jolivet 223956f8c0dSBarry Smith #if defined(PETSC_USE_LOG) 22456bd94f4SPierre Jolivet if (!maij || N[k] > 1) { 225*9566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 226*9566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister(stage_s, &stage)); 22756bd94f4SPierre Jolivet } 22856bd94f4SPierre Jolivet if (trans && N[k] > 1) { 229*9566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 230*9566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister(stage_s, &tstage)); 23156bd94f4SPierre Jolivet } 232956f8c0dSBarry Smith #endif 23356bd94f4SPierre Jolivet /* A*B */ 23456bd94f4SPierre Jolivet if (N[k] > 1) { 23556bd94f4SPierre Jolivet if (!maij) { 236*9566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 237*9566063dSJacob Faibussowitsch PetscCall(MatProductSetType(D, MATPRODUCT_AB)); 238*9566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(D)); 239*9566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(D)); 24056bd94f4SPierre Jolivet } 24156bd94f4SPierre Jolivet 24256bd94f4SPierre Jolivet if (!mkl) { 24356bd94f4SPierre Jolivet if (!maij) { 244*9566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(D)); 245*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 246*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 24756bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 248*9566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(D)); 24956bd94f4SPierre Jolivet } 250*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 25156bd94f4SPierre Jolivet } else { 25256bd94f4SPierre Jolivet Mat E, Ct, Dt; 25356bd94f4SPierre Jolivet Vec cC, cD; 25456bd94f4SPierre Jolivet const PetscScalar *c_ptr; 25556bd94f4SPierre Jolivet PetscScalar *d_ptr; 256*9566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(A, N[k], &E)); 257*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(C, &Ct)); 258*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(D, &Dt)); 259*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 260*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 261*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(Ct, &c_ptr)); 262*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr)); 263*9566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 264*9566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 265*9566063dSJacob Faibussowitsch PetscCall(MatMult(E, cC, cD)); 266*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 267*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 26856bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 269*9566063dSJacob Faibussowitsch PetscCall(MatMult(E, cC, cD)); 27056bd94f4SPierre Jolivet } 271*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 272*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&cD)); 273*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&cC)); 274*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&E)); 275*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 276*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr)); 277*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 278*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 27956bd94f4SPierre Jolivet } 28056bd94f4SPierre Jolivet } else { 28156bd94f4SPierre Jolivet const PetscScalar *c_ptr; 28256bd94f4SPierre Jolivet PetscScalar *d_ptr; 28356bd94f4SPierre Jolivet 284*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 285*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 28656bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 287*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 288*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 28956bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 29056bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 29156bd94f4SPierre Jolivet } 292*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 293*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 294*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 29556bd94f4SPierre Jolivet } 29656bd94f4SPierre Jolivet } else if (maij) { 297*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 298*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&D)); 29956bd94f4SPierre Jolivet continue; 30056bd94f4SPierre Jolivet } else if (!mkl) { 30156bd94f4SPierre Jolivet Vec cC, cD; 30256bd94f4SPierre Jolivet 303*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 304*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 305*9566063dSJacob Faibussowitsch PetscCall(MatMult(A, cC, cD)); 306*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 307*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 30856bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 309*9566063dSJacob Faibussowitsch PetscCall(MatMult(A, cC, cD)); 31056bd94f4SPierre Jolivet } 311*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 312*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 313*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 31456bd94f4SPierre Jolivet } else { 31556bd94f4SPierre Jolivet const PetscScalar *c_ptr; 31656bd94f4SPierre Jolivet PetscScalar *d_ptr; 31756bd94f4SPierre Jolivet 318*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 319*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 320*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 32156bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 322*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 32356bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 32456bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 32556bd94f4SPierre Jolivet } 326*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 327*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 328*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 32956bd94f4SPierre Jolivet } 33056bd94f4SPierre Jolivet 33156bd94f4SPierre Jolivet if (check) { 332*9566063dSJacob Faibussowitsch PetscCall(MatMatMultEqual(A, C, D, 10, &flg)); 33356bd94f4SPierre Jolivet if (!flg) { 33456bd94f4SPierre Jolivet MatType Dtype; 33556bd94f4SPierre Jolivet 336*9566063dSJacob Faibussowitsch PetscCall(MatGetType(D, &Dtype)); 337*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 33856bd94f4SPierre Jolivet } 33956bd94f4SPierre Jolivet } 34056bd94f4SPierre Jolivet 34156bd94f4SPierre Jolivet /* MKL implementation seems buggy for ABt */ 34256bd94f4SPierre Jolivet /* A*Bt */ 34356bd94f4SPierre Jolivet if (!mkl && trans && N[k] > 1) { 344*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 34556bd94f4SPierre Jolivet if (flg) { 346*9566063dSJacob Faibussowitsch PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 347*9566063dSJacob Faibussowitsch PetscCall(MatGetType(C, &Ctype)); 34856bd94f4SPierre Jolivet if (!mkl) { 349*9566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 350*9566063dSJacob Faibussowitsch PetscCall(MatProductSetType(D, MATPRODUCT_ABt)); 351*9566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(D)); 352*9566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(D)); 353*9566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(D)); 354*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 355*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(tstage)); 35656bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 357*9566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(D)); 35856bd94f4SPierre Jolivet } 359*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 36056bd94f4SPierre Jolivet } else { 36156bd94f4SPierre Jolivet const PetscScalar *c_ptr; 36256bd94f4SPierre Jolivet PetscScalar *d_ptr; 36356bd94f4SPierre Jolivet 36456bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 36556bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 366*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 367*9566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 368*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 36956bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 370*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(stage)); 37156bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 37256bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 37356bd94f4SPierre Jolivet } 374*9566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 375*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 376*9566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 37756bd94f4SPierre Jolivet } 37856bd94f4SPierre Jolivet } 37956bd94f4SPierre Jolivet } 38056bd94f4SPierre Jolivet 38156bd94f4SPierre Jolivet if (!mkl && trans && N[k] > 1 && flg && check) { 382*9566063dSJacob Faibussowitsch PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 38356bd94f4SPierre Jolivet if (!flg) { 38456bd94f4SPierre Jolivet MatType Dtype; 385*9566063dSJacob Faibussowitsch PetscCall(MatGetType(D, &Dtype)); 386*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 38756bd94f4SPierre Jolivet } 38856bd94f4SPierre Jolivet } 389*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 390*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&D)); 39156bd94f4SPierre Jolivet } 39256bd94f4SPierre Jolivet if (mkl) { 39356bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 394*9566063dSJacob Faibussowitsch PetscCall(PetscFree(ia_ptr)); 395*9566063dSJacob Faibussowitsch PetscCall(PetscFree(ja_ptr)); 39656bd94f4SPierre Jolivet } 39756bd94f4SPierre Jolivet if (cuda && i != ntype - 1) { 398*9566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 39956bd94f4SPierre Jolivet break; 40056bd94f4SPierre Jolivet } 40156bd94f4SPierre Jolivet } 402*9566063dSJacob Faibussowitsch if (E != A) PetscCall(MatDestroy(&E)); 403*9566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 40456bd94f4SPierre Jolivet } 405*9566063dSJacob Faibussowitsch for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m])); 406*9566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 40756bd94f4SPierre Jolivet return 0; 40856bd94f4SPierre Jolivet } 40956bd94f4SPierre Jolivet 41056bd94f4SPierre Jolivet /*TEST 411b8582c7cSSatish Balay build: 412dfd57a17SPierre Jolivet requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 41356bd94f4SPierre Jolivet 41456bd94f4SPierre Jolivet testset: 41556bd94f4SPierre Jolivet nsize: 1 41656bd94f4SPierre Jolivet filter: sed "/Benchmarking/d" 41756bd94f4SPierre Jolivet args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 41856bd94f4SPierre Jolivet test: 41956bd94f4SPierre Jolivet suffix: basic 42056bd94f4SPierre Jolivet args: -type aij,sbaij,baij 42156bd94f4SPierre Jolivet output_file: output/ex237.out 42256bd94f4SPierre Jolivet test: 42356bd94f4SPierre Jolivet suffix: maij 42456bd94f4SPierre Jolivet args: -type aij,maij 42556bd94f4SPierre Jolivet output_file: output/ex237.out 42656bd94f4SPierre Jolivet test: 42756bd94f4SPierre Jolivet suffix: cuda 42856bd94f4SPierre Jolivet requires: cuda 42956bd94f4SPierre Jolivet args: -type aij,aijcusparse 43056bd94f4SPierre Jolivet output_file: output/ex237.out 43156bd94f4SPierre Jolivet test: 43256bd94f4SPierre Jolivet suffix: mkl 433b88df2e7SBarry Smith requires: mkl_sparse_optimize 43456bd94f4SPierre Jolivet args: -type aij,aijmkl,baijmkl,sbaijmkl 43556bd94f4SPierre Jolivet output_file: output/ex237.out 43656bd94f4SPierre Jolivet 43756bd94f4SPierre Jolivet TEST*/ 438