156bd94f4SPierre Jolivet static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 256bd94f4SPierre Jolivet 356bd94f4SPierre Jolivet /* 456bd94f4SPierre Jolivet See the paper below for more information 556bd94f4SPierre Jolivet 656bd94f4SPierre Jolivet "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 756bd94f4SPierre Jolivet P. Jolivet, J. E. Roman, and S. Zampini (2020). 856bd94f4SPierre Jolivet */ 956bd94f4SPierre Jolivet 1056bd94f4SPierre Jolivet #include <petsc.h> 1156bd94f4SPierre Jolivet 12b88df2e7SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1356bd94f4SPierre Jolivet #include <mkl.h> 149371c9d4SSatish Balay #define PetscCallMKLSparse(func, args) \ 159371c9d4SSatish Balay do { \ 1656bd94f4SPierre Jolivet sparse_status_t __ierr; \ 17792fecdfSBarry Smith PetscStackPushExternal(#func); \ 1856bd94f4SPierre Jolivet __ierr = func args; \ 1956bd94f4SPierre Jolivet PetscStackPop; \ 2008401ef6SPierre Jolivet PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 2156bd94f4SPierre Jolivet } while (0) 2256bd94f4SPierre Jolivet #else 239371c9d4SSatish Balay #define PetscCallMKLSparse(func, args) \ 24d71ae5a4SJacob Faibussowitsch do { \ 25d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 26d71ae5a4SJacob Faibussowitsch } while (0) 2756bd94f4SPierre Jolivet #endif 2856bd94f4SPierre Jolivet 29d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 30d71ae5a4SJacob Faibussowitsch { 3156bd94f4SPierre Jolivet Mat A, C, D, E; 3256bd94f4SPierre Jolivet PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 3356bd94f4SPierre Jolivet PetscViewer viewer; 3456bd94f4SPierre Jolivet PetscInt bs[10], N[8]; 3556bd94f4SPierre Jolivet char *type[10]; 3656bd94f4SPierre Jolivet PetscMPIInt size; 37*1766d9c3SPierre Jolivet PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, abt = PETSC_FALSE, atb = PETSC_FALSE, convert = PETSC_FALSE, mkl; 3856bd94f4SPierre Jolivet char file[PETSC_MAX_PATH_LEN]; 3956bd94f4SPierre Jolivet 40327415f7SBarry Smith PetscFunctionBeginUser; 419566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 43be096a46SBarry Smith PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 449566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 4528b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 469566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 479566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 4856bd94f4SPierre Jolivet if (!flg) { 4956bd94f4SPierre Jolivet nbs = 1; 5056bd94f4SPierre Jolivet bs[0] = 1; 5156bd94f4SPierre Jolivet } 529566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 5356bd94f4SPierre Jolivet if (!flg) { 5456bd94f4SPierre Jolivet nN = 8; 559371c9d4SSatish Balay N[0] = 1; 569371c9d4SSatish Balay N[1] = 2; 579371c9d4SSatish Balay N[2] = 4; 589371c9d4SSatish Balay N[3] = 8; 599371c9d4SSatish Balay N[4] = 16; 609371c9d4SSatish Balay N[5] = 32; 619371c9d4SSatish Balay N[6] = 64; 629371c9d4SSatish Balay N[7] = 128; 6356bd94f4SPierre Jolivet } 649566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 6556bd94f4SPierre Jolivet if (!flg) { 6656bd94f4SPierre Jolivet ntype = 1; 679566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0])); 6856bd94f4SPierre Jolivet } 699566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 70*1766d9c3SPierre Jolivet PetscCall(PetscOptionsGetBool(NULL, NULL, "-ABt", &abt, NULL)); 71*1766d9c3SPierre Jolivet PetscCall(PetscOptionsGetBool(NULL, NULL, "-AtB", &atb, NULL)); 729566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 7356bd94f4SPierre Jolivet for (PetscInt j = 0; j < nbs; ++j) { 749566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 759566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 769566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A)); 779566063dSJacob Faibussowitsch PetscCall(MatLoad(A, viewer)); 789566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 799566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 8028b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 819566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &M)); 8256bd94f4SPierre Jolivet if (m == M) { 8356bd94f4SPierre Jolivet Mat oA; 849566063dSJacob Faibussowitsch PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 859566063dSJacob Faibussowitsch PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&oA)); 8756bd94f4SPierre Jolivet } 889566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &m, NULL)); 899566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &M, NULL)); 9056bd94f4SPierre Jolivet if (bs[j] > 1) { 9156bd94f4SPierre Jolivet Mat T, Tt, B; 9256bd94f4SPierre Jolivet const PetscScalar *ptr; 9356bd94f4SPierre Jolivet PetscScalar *val, *Aa; 9456bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 9556bd94f4SPierre Jolivet PetscInt An, i, k; 9656bd94f4SPierre Jolivet PetscBool done; 9756bd94f4SPierre Jolivet 989566063dSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 999566063dSJacob Faibussowitsch PetscCall(MatSetRandom(T, NULL)); 1009566063dSJacob Faibussowitsch PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 1019566063dSJacob Faibussowitsch PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 1029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Tt)); 1039566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(T, &ptr)); 1049566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 105e00437b9SBarry Smith PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 1069566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &Aa)); 1079566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &B)); 1089566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQBAIJ)); 1099566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 1109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 11156bd94f4SPierre Jolivet for (i = 0; i < Ai[An]; ++i) 1129371c9d4SSatish Balay for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 1139566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 1149566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 1159566063dSJacob Faibussowitsch PetscCall(PetscFree(val)); 1169566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 1179566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 1189566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(T, &ptr)); 1199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 1209566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 12156bd94f4SPierre Jolivet A = B; 12256bd94f4SPierre Jolivet } 12356bd94f4SPierre Jolivet /* reconvert back to SeqAIJ before converting to the desired type later */ 12456bd94f4SPierre Jolivet if (!convert) E = A; 1259566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 1269566063dSJacob Faibussowitsch PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 12756bd94f4SPierre Jolivet for (PetscInt i = 0; i < ntype; ++i) { 1287864358aSSatish Balay char *tmp = NULL; 12956bd94f4SPierre Jolivet PetscInt *ia_ptr, *ja_ptr, k; 13056bd94f4SPierre Jolivet PetscScalar *a_ptr; 131b88df2e7SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13256bd94f4SPierre Jolivet struct matrix_descr descr; 13356bd94f4SPierre Jolivet sparse_matrix_t spr; 13456bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_GENERAL; 13556bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 13656bd94f4SPierre Jolivet #endif 13748a46eb9SPierre Jolivet if (convert) PetscCall(MatDestroy(&A)); 1389566063dSJacob Faibussowitsch PetscCall(PetscStrstr(type[i], "mkl", &tmp)); 13956bd94f4SPierre Jolivet if (tmp) { 14056bd94f4SPierre Jolivet size_t mlen, tlen; 14156bd94f4SPierre Jolivet char base[256]; 14256bd94f4SPierre Jolivet 14356bd94f4SPierre Jolivet mkl = PETSC_TRUE; 1449566063dSJacob Faibussowitsch PetscCall(PetscStrlen(tmp, &mlen)); 1459566063dSJacob Faibussowitsch PetscCall(PetscStrlen(type[i], &tlen)); 1469566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1)); 1479566063dSJacob Faibussowitsch PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 14856bd94f4SPierre Jolivet } else { 14956bd94f4SPierre Jolivet mkl = PETSC_FALSE; 1509566063dSJacob Faibussowitsch PetscCall(PetscStrstr(type[i], "maij", &tmp)); 15156bd94f4SPierre Jolivet if (!tmp) { 1529566063dSJacob Faibussowitsch PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 15356bd94f4SPierre Jolivet } else { 1549566063dSJacob Faibussowitsch PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 15556bd94f4SPierre Jolivet maij = PETSC_TRUE; 15656bd94f4SPierre Jolivet } 15756bd94f4SPierre Jolivet } 1589566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 15956bd94f4SPierre Jolivet if (mkl) { 16056bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 16156bd94f4SPierre Jolivet PetscInt An; 16256bd94f4SPierre Jolivet PetscBool done; 16356bd94f4SPierre Jolivet 1649566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 16528b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 1669566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1679566063dSJacob Faibussowitsch PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 16828b400f6SJacob Faibussowitsch PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 1699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(An + 1, &ia_ptr)); 1709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Ai[An], &ja_ptr)); 17156bd94f4SPierre Jolivet if (flg) { /* SeqAIJ */ 17256bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 17356bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 1749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(A, &a_ptr)); 175792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 17656bd94f4SPierre Jolivet } else { 1779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 17856bd94f4SPierre Jolivet if (flg) { 17956bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 18056bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 1819566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJGetArray(A, &a_ptr)); 182792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 18356bd94f4SPierre Jolivet } else { 1849566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 18556bd94f4SPierre Jolivet if (flg) { 18656bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 18756bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 1889566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJGetArray(A, &a_ptr)); 189792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 190b88df2e7SBarry Smith #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 19156bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 19256bd94f4SPierre Jolivet descr.mode = SPARSE_FILL_MODE_UPPER; 19356bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 19456bd94f4SPierre Jolivet #endif 19556bd94f4SPierre Jolivet } 19656bd94f4SPierre Jolivet } 19756bd94f4SPierre Jolivet } 1989566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1999566063dSJacob Faibussowitsch PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 20056bd94f4SPierre Jolivet } 20156bd94f4SPierre Jolivet 2029566063dSJacob Faibussowitsch PetscCall(MatViewFromOptions(A, NULL, "-A_view")); 20356bd94f4SPierre Jolivet 20456bd94f4SPierre Jolivet for (k = 0; k < nN; ++k) { 20556bd94f4SPierre Jolivet MatType Atype, Ctype; 20656bd94f4SPierre Jolivet PetscInt AM, AN, CM, CN, t; 207*1766d9c3SPierre Jolivet PetscBool set = PETSC_FALSE; 208*1766d9c3SPierre Jolivet PetscLogStage ab_stage, abt_stage, atb_stage; 20956bd94f4SPierre Jolivet char stage_s[256]; 21056bd94f4SPierre Jolivet 2119566063dSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 2129566063dSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 2139566063dSJacob Faibussowitsch PetscCall(MatSetRandom(C, NULL)); 21456bd94f4SPierre Jolivet if (cuda) { /* convert to GPU if needed */ 2159566063dSJacob Faibussowitsch PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 2169566063dSJacob Faibussowitsch PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 21756bd94f4SPierre Jolivet } 21856bd94f4SPierre Jolivet if (mkl) { 219792fecdfSBarry Smith if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 220792fecdfSBarry Smith else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 221792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 222792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 22356bd94f4SPierre Jolivet } 2249566063dSJacob Faibussowitsch PetscCall(MatGetType(A, &Atype)); 2259566063dSJacob Faibussowitsch PetscCall(MatGetType(C, &Ctype)); 2269566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &AM, &AN)); 2279566063dSJacob Faibussowitsch PetscCall(MatGetSize(C, &CM, &CN)); 22856bd94f4SPierre Jolivet 22956bd94f4SPierre Jolivet if (!maij || N[k] > 1) { 230*1766d9c3SPierre Jolivet PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "ab_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 231*1766d9c3SPierre Jolivet PetscCall(PetscLogStageRegister(stage_s, &ab_stage)); 23256bd94f4SPierre Jolivet } 233*1766d9c3SPierre Jolivet if (abt && N[k] > 1) { 234*1766d9c3SPierre Jolivet PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "abt_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 235*1766d9c3SPierre Jolivet PetscCall(PetscLogStageRegister(stage_s, &abt_stage)); 236*1766d9c3SPierre Jolivet } 237*1766d9c3SPierre Jolivet if (atb) { 238*1766d9c3SPierre Jolivet PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "atb_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 239*1766d9c3SPierre Jolivet PetscCall(PetscLogStageRegister(stage_s, &atb_stage)); 24056bd94f4SPierre Jolivet } 24156bd94f4SPierre Jolivet /* A*B */ 24256bd94f4SPierre Jolivet if (N[k] > 1) { 24356bd94f4SPierre Jolivet if (!maij) { 2449566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 2459566063dSJacob Faibussowitsch PetscCall(MatProductSetType(D, MATPRODUCT_AB)); 2469566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(D)); 2479566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(D)); 24856bd94f4SPierre Jolivet } 24956bd94f4SPierre Jolivet 25056bd94f4SPierre Jolivet if (!mkl) { 25156bd94f4SPierre Jolivet if (!maij) { 2529566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(D)); 2539566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 254*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(ab_stage)); 25548a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 2569566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 25756bd94f4SPierre Jolivet } else { 25856bd94f4SPierre Jolivet Mat E, Ct, Dt; 25956bd94f4SPierre Jolivet Vec cC, cD; 26056bd94f4SPierre Jolivet const PetscScalar *c_ptr; 26156bd94f4SPierre Jolivet PetscScalar *d_ptr; 2629566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(A, N[k], &E)); 2639566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(C, &Ct)); 2649566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(D, &Dt)); 2659566063dSJacob Faibussowitsch PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 2669566063dSJacob Faibussowitsch PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 2679566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(Ct, &c_ptr)); 2689566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr)); 2699566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 2709566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 2719566063dSJacob Faibussowitsch PetscCall(MatMult(E, cC, cD)); 2729566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 273*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(ab_stage)); 27448a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD)); 2759566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 2769566063dSJacob Faibussowitsch PetscCall(VecDestroy(&cD)); 2779566063dSJacob Faibussowitsch PetscCall(VecDestroy(&cC)); 2789566063dSJacob Faibussowitsch PetscCall(MatDestroy(&E)); 2799566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 2809566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr)); 2819566063dSJacob Faibussowitsch PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 2829566063dSJacob Faibussowitsch PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 28356bd94f4SPierre Jolivet } 28456bd94f4SPierre Jolivet } else { 28556bd94f4SPierre Jolivet const PetscScalar *c_ptr; 28656bd94f4SPierre Jolivet PetscScalar *d_ptr; 28756bd94f4SPierre Jolivet 2889566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 2899566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 290792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 2919566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 292*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(ab_stage)); 29348a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 2949566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 2959566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 2969566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 29756bd94f4SPierre Jolivet } 29856bd94f4SPierre Jolivet } else if (maij) { 2999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 3009566063dSJacob Faibussowitsch PetscCall(MatDestroy(&D)); 30156bd94f4SPierre Jolivet continue; 30256bd94f4SPierre Jolivet } else if (!mkl) { 30356bd94f4SPierre Jolivet Vec cC, cD; 30456bd94f4SPierre Jolivet 3059566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 3069566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 3079566063dSJacob Faibussowitsch PetscCall(MatMult(A, cC, cD)); 3089566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 309*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(ab_stage)); 31048a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD)); 3119566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 3129566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 3139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 31456bd94f4SPierre Jolivet } else { 31556bd94f4SPierre Jolivet const PetscScalar *c_ptr; 31656bd94f4SPierre Jolivet PetscScalar *d_ptr; 31756bd94f4SPierre Jolivet 3189566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 3199566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 3209566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 321792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 322*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(ab_stage)); 32348a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 3249566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 3259566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 3269566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 32756bd94f4SPierre Jolivet } 32856bd94f4SPierre Jolivet 32956bd94f4SPierre Jolivet if (check) { 3309566063dSJacob Faibussowitsch PetscCall(MatMatMultEqual(A, C, D, 10, &flg)); 33156bd94f4SPierre Jolivet if (!flg) { 33256bd94f4SPierre Jolivet MatType Dtype; 33356bd94f4SPierre Jolivet 3349566063dSJacob Faibussowitsch PetscCall(MatGetType(D, &Dtype)); 3359566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 33656bd94f4SPierre Jolivet } 33756bd94f4SPierre Jolivet } 33856bd94f4SPierre Jolivet 33956bd94f4SPierre Jolivet /* MKL implementation seems buggy for ABt */ 34056bd94f4SPierre Jolivet /* A*Bt */ 341*1766d9c3SPierre Jolivet if (!mkl && abt && N[k] > 1) { 3429566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 34356bd94f4SPierre Jolivet if (flg) { 3449566063dSJacob Faibussowitsch PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 3459566063dSJacob Faibussowitsch PetscCall(MatGetType(C, &Ctype)); 34656bd94f4SPierre Jolivet if (!mkl) { 3479566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 3489566063dSJacob Faibussowitsch PetscCall(MatProductSetType(D, MATPRODUCT_ABt)); 3499566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(D)); 3509566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(D)); 3519566063dSJacob Faibussowitsch PetscCall(MatProductNumeric(D)); 3529566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 353*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(abt_stage)); 35448a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 3559566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 35656bd94f4SPierre Jolivet } else { 35756bd94f4SPierre Jolivet const PetscScalar *c_ptr; 35856bd94f4SPierre Jolivet PetscScalar *d_ptr; 35956bd94f4SPierre Jolivet 360792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 361792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 3629566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 3639566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 3649566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 365792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 366*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(ab_stage)); 36748a46eb9SPierre Jolivet for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 3689566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 3699566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 3709566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 37156bd94f4SPierre Jolivet } 37256bd94f4SPierre Jolivet } 37356bd94f4SPierre Jolivet } 37456bd94f4SPierre Jolivet 375*1766d9c3SPierre Jolivet if (!mkl && abt && N[k] > 1 && flg && check) { 3769566063dSJacob Faibussowitsch PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 37756bd94f4SPierre Jolivet if (!flg) { 37856bd94f4SPierre Jolivet MatType Dtype; 3799566063dSJacob Faibussowitsch PetscCall(MatGetType(D, &Dtype)); 3809566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 38156bd94f4SPierre Jolivet } 38256bd94f4SPierre Jolivet } 383*1766d9c3SPierre Jolivet 384*1766d9c3SPierre Jolivet /* At*B */ 385*1766d9c3SPierre Jolivet if (!mkl && atb) { 386*1766d9c3SPierre Jolivet PetscCall(MatIsSymmetricKnown(A, &set, &flg)); 387*1766d9c3SPierre Jolivet set = (PetscBool)(set && flg); 388*1766d9c3SPierre Jolivet PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_FALSE)); 389*1766d9c3SPierre Jolivet if (N[k] > 1) { 390*1766d9c3SPierre Jolivet PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 391*1766d9c3SPierre Jolivet PetscCall(MatProductSetType(D, MATPRODUCT_AtB)); 392*1766d9c3SPierre Jolivet PetscCall(MatProductSetFromOptions(D)); 393*1766d9c3SPierre Jolivet PetscCall(MatProductSymbolic(D)); 394*1766d9c3SPierre Jolivet PetscCall(MatProductNumeric(D)); 395*1766d9c3SPierre Jolivet PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with At %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AtB], Atype, AM, AN, Ctype, CM, CN)); 396*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(atb_stage)); 397*1766d9c3SPierre Jolivet for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 398*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePop()); 399*1766d9c3SPierre Jolivet } else { 400*1766d9c3SPierre Jolivet Vec cC, cD; 401*1766d9c3SPierre Jolivet 402*1766d9c3SPierre Jolivet PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 403*1766d9c3SPierre Jolivet PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 404*1766d9c3SPierre Jolivet PetscCall(MatMultTranspose(A, cC, cD)); 405*1766d9c3SPierre Jolivet PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMultTranspose: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 406*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePush(atb_stage)); 407*1766d9c3SPierre Jolivet for (t = 0; t < trial; ++t) PetscCall(MatMultTranspose(A, cC, cD)); 408*1766d9c3SPierre Jolivet PetscCall(PetscLogStagePop()); 409*1766d9c3SPierre Jolivet PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 410*1766d9c3SPierre Jolivet PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 411*1766d9c3SPierre Jolivet } 412*1766d9c3SPierre Jolivet } 413*1766d9c3SPierre Jolivet 414*1766d9c3SPierre Jolivet if (!mkl && atb && N[k] > 1 && check) { 415*1766d9c3SPierre Jolivet PetscCall(MatTransposeMatMultEqual(A, C, D, 10, &flg)); 416*1766d9c3SPierre Jolivet if (!flg) { 417*1766d9c3SPierre Jolivet MatType Dtype; 418*1766d9c3SPierre Jolivet PetscCall(MatGetType(D, &Dtype)); 419*1766d9c3SPierre Jolivet PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 420*1766d9c3SPierre Jolivet } 421*1766d9c3SPierre Jolivet } 422*1766d9c3SPierre Jolivet if (set) PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 4239566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 4249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&D)); 42556bd94f4SPierre Jolivet } 42656bd94f4SPierre Jolivet if (mkl) { 427792fecdfSBarry Smith PetscCallMKLSparse(mkl_sparse_destroy, (spr)); 4289566063dSJacob Faibussowitsch PetscCall(PetscFree(ia_ptr)); 4299566063dSJacob Faibussowitsch PetscCall(PetscFree(ja_ptr)); 43056bd94f4SPierre Jolivet } 43156bd94f4SPierre Jolivet if (cuda && i != ntype - 1) { 4329566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 43356bd94f4SPierre Jolivet break; 43456bd94f4SPierre Jolivet } 43556bd94f4SPierre Jolivet } 4369566063dSJacob Faibussowitsch if (E != A) PetscCall(MatDestroy(&E)); 4379566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 43856bd94f4SPierre Jolivet } 4399566063dSJacob Faibussowitsch for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m])); 4409566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 44156bd94f4SPierre Jolivet return 0; 44256bd94f4SPierre Jolivet } 44356bd94f4SPierre Jolivet 44456bd94f4SPierre Jolivet /*TEST 445b8582c7cSSatish Balay build: 446dfd57a17SPierre Jolivet requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 44756bd94f4SPierre Jolivet 44856bd94f4SPierre Jolivet testset: 44956bd94f4SPierre Jolivet nsize: 1 45056bd94f4SPierre Jolivet filter: sed "/Benchmarking/d" 451*1766d9c3SPierre Jolivet args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -ABt -convert_aij {{false true}shared output} 452*1766d9c3SPierre Jolivet output_file: output/empty.out 45356bd94f4SPierre Jolivet test: 45456bd94f4SPierre Jolivet suffix: basic 45556bd94f4SPierre Jolivet args: -type aij,sbaij,baij 45656bd94f4SPierre Jolivet test: 45756bd94f4SPierre Jolivet suffix: maij 45856bd94f4SPierre Jolivet args: -type aij,maij 45956bd94f4SPierre Jolivet test: 46056bd94f4SPierre Jolivet suffix: cuda 46156bd94f4SPierre Jolivet requires: cuda 46256bd94f4SPierre Jolivet args: -type aij,aijcusparse 46356bd94f4SPierre Jolivet test: 46456bd94f4SPierre Jolivet suffix: mkl 465b88df2e7SBarry Smith requires: mkl_sparse_optimize 46656bd94f4SPierre Jolivet args: -type aij,aijmkl,baijmkl,sbaijmkl 467*1766d9c3SPierre Jolivet 468*1766d9c3SPierre Jolivet test: 469*1766d9c3SPierre Jolivet nsize: 1 470*1766d9c3SPierre Jolivet filter: sed "/Benchmarking/d" 471*1766d9c3SPierre Jolivet args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3,4,5,6 -N 1,8 -check -AtB -type baij 4723886731fSPierre Jolivet output_file: output/empty.out 47356bd94f4SPierre Jolivet 47456bd94f4SPierre Jolivet TEST*/ 475