156bd94f4SPierre Jolivet static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 256bd94f4SPierre Jolivet 356bd94f4SPierre Jolivet /* 456bd94f4SPierre Jolivet See the paper below for more information 556bd94f4SPierre Jolivet 656bd94f4SPierre Jolivet "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 756bd94f4SPierre Jolivet P. Jolivet, J. E. Roman, and S. Zampini (2020). 856bd94f4SPierre Jolivet */ 956bd94f4SPierre Jolivet 1056bd94f4SPierre Jolivet #include <petsc.h> 1156bd94f4SPierre Jolivet 128c0a8bd1SPierre Jolivet #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1356bd94f4SPierre Jolivet #include <mkl.h> 1456bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do { \ 1556bd94f4SPierre Jolivet sparse_status_t __ierr; \ 1656bd94f4SPierre Jolivet PetscStackPush(#func); \ 1756bd94f4SPierre Jolivet __ierr = func args; \ 1856bd94f4SPierre Jolivet PetscStackPop; \ 1956bd94f4SPierre Jolivet if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 2056bd94f4SPierre Jolivet } while (0) 2156bd94f4SPierre Jolivet #else 2256bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do { \ 2356bd94f4SPierre Jolivet SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 2456bd94f4SPierre Jolivet } while (0) 2556bd94f4SPierre Jolivet #endif 2656bd94f4SPierre Jolivet 27956f8c0dSBarry Smith int main(int argc, char** argv) 28956f8c0dSBarry Smith { 2956bd94f4SPierre Jolivet Mat A, C, D, E; 3056bd94f4SPierre Jolivet PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 3156bd94f4SPierre Jolivet PetscViewer viewer; 3256bd94f4SPierre Jolivet PetscInt bs[10], N[8]; 3356bd94f4SPierre Jolivet char *type[10]; 3456bd94f4SPierre Jolivet PetscMPIInt size; 3556bd94f4SPierre Jolivet PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 3656bd94f4SPierre Jolivet char file[PETSC_MAX_PATH_LEN]; 3756bd94f4SPierre Jolivet PetscErrorCode ierr; 3856bd94f4SPierre Jolivet 392da392ccSBarry Smith ierr = PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr; 4056bd94f4SPierre Jolivet if (ierr) return ierr; 41ffc4695bSBarry Smith ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRMPI(ierr); 4256bd94f4SPierre Jolivet if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 4356bd94f4SPierre Jolivet ierr = PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);CHKERRQ(ierr); 4456bd94f4SPierre Jolivet if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 4556bd94f4SPierre Jolivet ierr = PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);CHKERRQ(ierr); 4656bd94f4SPierre Jolivet ierr = PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);CHKERRQ(ierr); 4756bd94f4SPierre Jolivet if (!flg) { 4856bd94f4SPierre Jolivet nbs = 1; 4956bd94f4SPierre Jolivet bs[0] = 1; 5056bd94f4SPierre Jolivet } 5156bd94f4SPierre Jolivet ierr = PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);CHKERRQ(ierr); 5256bd94f4SPierre Jolivet if (!flg) { 5356bd94f4SPierre Jolivet nN = 8; 5456bd94f4SPierre Jolivet N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 5556bd94f4SPierre Jolivet N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 5656bd94f4SPierre Jolivet } 5756bd94f4SPierre Jolivet ierr = PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);CHKERRQ(ierr); 5856bd94f4SPierre Jolivet if (!flg) { 5956bd94f4SPierre Jolivet ntype = 1; 6056bd94f4SPierre Jolivet ierr = PetscStrallocpy(MATSEQAIJ, &type[0]);CHKERRQ(ierr); 6156bd94f4SPierre Jolivet } 6256bd94f4SPierre Jolivet ierr = PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);CHKERRQ(ierr); 6356bd94f4SPierre Jolivet ierr = PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);CHKERRQ(ierr); 6456bd94f4SPierre Jolivet ierr = PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);CHKERRQ(ierr); 6556bd94f4SPierre Jolivet for (PetscInt j = 0; j < nbs; ++j) { 6656bd94f4SPierre Jolivet ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);CHKERRQ(ierr); 6756bd94f4SPierre Jolivet ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); 6856bd94f4SPierre Jolivet ierr = MatSetFromOptions(A);CHKERRQ(ierr); 6956bd94f4SPierre Jolivet ierr = MatLoad(A, viewer);CHKERRQ(ierr); 7056bd94f4SPierre Jolivet ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7156bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 7256bd94f4SPierre Jolivet if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 7356bd94f4SPierre Jolivet ierr = MatGetSize(A, &m, &M);CHKERRQ(ierr); 7456bd94f4SPierre Jolivet if (m == M) { 7556bd94f4SPierre Jolivet Mat oA; 7656bd94f4SPierre Jolivet ierr = MatTranspose(A, MAT_INITIAL_MATRIX, &oA);CHKERRQ(ierr); 7756bd94f4SPierre Jolivet ierr = MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 7856bd94f4SPierre Jolivet ierr = MatDestroy(&oA);CHKERRQ(ierr); 7956bd94f4SPierre Jolivet } 8056bd94f4SPierre Jolivet ierr = MatGetLocalSize(A, &m, NULL);CHKERRQ(ierr); 8156bd94f4SPierre Jolivet ierr = MatGetSize(A, &M, NULL);CHKERRQ(ierr); 8256bd94f4SPierre Jolivet if (bs[j] > 1) { 8356bd94f4SPierre Jolivet Mat T, Tt, B; 8456bd94f4SPierre Jolivet const PetscScalar *ptr; 8556bd94f4SPierre Jolivet PetscScalar *val, *Aa; 8656bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 8756bd94f4SPierre Jolivet PetscInt An, i, k; 8856bd94f4SPierre Jolivet PetscBool done; 8956bd94f4SPierre Jolivet 9056bd94f4SPierre Jolivet ierr = MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);CHKERRQ(ierr); 9156bd94f4SPierre Jolivet ierr = MatSetRandom(T, NULL);CHKERRQ(ierr); 9256bd94f4SPierre Jolivet ierr = MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);CHKERRQ(ierr); 9356bd94f4SPierre Jolivet ierr = MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);CHKERRQ(ierr); 9456bd94f4SPierre Jolivet ierr = MatDestroy(&Tt);CHKERRQ(ierr); 9556bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(T, &ptr);CHKERRQ(ierr); 9656bd94f4SPierre Jolivet ierr = MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 9756bd94f4SPierre Jolivet if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 9856bd94f4SPierre Jolivet ierr = MatSeqAIJGetArray(A, &Aa);CHKERRQ(ierr); 9956bd94f4SPierre Jolivet ierr = MatCreate(PETSC_COMM_WORLD, &B);CHKERRQ(ierr); 10056bd94f4SPierre Jolivet ierr = MatSetType(B, MATSEQBAIJ);CHKERRQ(ierr); 10156bd94f4SPierre Jolivet ierr = MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); 10256bd94f4SPierre Jolivet ierr = PetscMalloc1(Ai[An] * bs[j] * bs[j], &val);CHKERRQ(ierr); 10356bd94f4SPierre Jolivet for (i = 0; i < Ai[An]; ++i) 10456bd94f4SPierre Jolivet for (k = 0; k < bs[j] * bs[j]; ++k) 10556bd94f4SPierre Jolivet val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 10656bd94f4SPierre Jolivet ierr = MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);CHKERRQ(ierr); 10756bd94f4SPierre Jolivet ierr = MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);CHKERRQ(ierr); 10856bd94f4SPierre Jolivet ierr = PetscFree(val);CHKERRQ(ierr); 10956bd94f4SPierre Jolivet ierr = MatSeqAIJRestoreArray(A, &Aa);CHKERRQ(ierr); 11056bd94f4SPierre Jolivet ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 11156bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(T, &ptr);CHKERRQ(ierr); 11256bd94f4SPierre Jolivet ierr = MatDestroy(&T);CHKERRQ(ierr); 11356bd94f4SPierre Jolivet ierr = MatDestroy(&A);CHKERRQ(ierr); 11456bd94f4SPierre Jolivet A = B; 11556bd94f4SPierre Jolivet } 11656bd94f4SPierre Jolivet /* reconvert back to SeqAIJ before converting to the desired type later */ 11756bd94f4SPierre Jolivet if (!convert) E = A; 11856bd94f4SPierre Jolivet ierr = MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);CHKERRQ(ierr); 11956bd94f4SPierre Jolivet ierr = MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);CHKERRQ(ierr); 12056bd94f4SPierre Jolivet for (PetscInt i = 0; i < ntype; ++i) { 12156bd94f4SPierre Jolivet char *tmp; 12256bd94f4SPierre Jolivet PetscInt *ia_ptr, *ja_ptr, k; 12356bd94f4SPierre Jolivet PetscScalar *a_ptr; 1248c0a8bd1SPierre Jolivet #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 12556bd94f4SPierre Jolivet struct matrix_descr descr; 12656bd94f4SPierre Jolivet sparse_matrix_t spr; 12756bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_GENERAL; 12856bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 12956bd94f4SPierre Jolivet #endif 13056bd94f4SPierre Jolivet if (convert) { 13156bd94f4SPierre Jolivet ierr = MatDestroy(&A);CHKERRQ(ierr); 13256bd94f4SPierre Jolivet } 13356bd94f4SPierre Jolivet ierr = PetscStrstr(type[i], "mkl", &tmp);CHKERRQ(ierr); 13456bd94f4SPierre Jolivet if (tmp) { 13556bd94f4SPierre Jolivet size_t mlen, tlen; 13656bd94f4SPierre Jolivet char base[256]; 13756bd94f4SPierre Jolivet 13856bd94f4SPierre Jolivet mkl = PETSC_TRUE; 13956bd94f4SPierre Jolivet ierr = PetscStrlen(tmp, &mlen);CHKERRQ(ierr); 14056bd94f4SPierre Jolivet ierr = PetscStrlen(type[i], &tlen);CHKERRQ(ierr); 14156bd94f4SPierre Jolivet ierr = PetscStrncpy(base, type[i], tlen-mlen + 1);CHKERRQ(ierr); 14256bd94f4SPierre Jolivet ierr = MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 14356bd94f4SPierre Jolivet } else { 14456bd94f4SPierre Jolivet mkl = PETSC_FALSE; 14556bd94f4SPierre Jolivet ierr = PetscStrstr(type[i], "maij", &tmp);CHKERRQ(ierr); 14656bd94f4SPierre Jolivet if (!tmp) { 14756bd94f4SPierre Jolivet ierr = MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 14856bd94f4SPierre Jolivet } else { 14956bd94f4SPierre Jolivet ierr = MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 15056bd94f4SPierre Jolivet maij = PETSC_TRUE; 15156bd94f4SPierre Jolivet } 15256bd94f4SPierre Jolivet } 15356bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");CHKERRQ(ierr); 15456bd94f4SPierre Jolivet if (mkl) { 15556bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 15656bd94f4SPierre Jolivet PetscInt An; 15756bd94f4SPierre Jolivet PetscBool done; 15856bd94f4SPierre Jolivet 15956bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");CHKERRQ(ierr); 16056bd94f4SPierre Jolivet if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 16156bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 16256bd94f4SPierre Jolivet ierr = MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 16356bd94f4SPierre Jolivet if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 16456bd94f4SPierre Jolivet ierr = PetscMalloc1(An + 1, &ia_ptr);CHKERRQ(ierr); 16556bd94f4SPierre Jolivet ierr = PetscMalloc1(Ai[An], &ja_ptr);CHKERRQ(ierr); 16656bd94f4SPierre Jolivet if (flg) { /* SeqAIJ */ 16756bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 16856bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 16956bd94f4SPierre Jolivet ierr = MatSeqAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 17056bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 17156bd94f4SPierre Jolivet } else { 17256bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);CHKERRQ(ierr); 17356bd94f4SPierre Jolivet if (flg) { 17456bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 17556bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 17656bd94f4SPierre Jolivet ierr = MatSeqBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 17756bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 17856bd94f4SPierre Jolivet } else { 17956bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);CHKERRQ(ierr); 18056bd94f4SPierre Jolivet if (flg) { 18156bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 18256bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 18356bd94f4SPierre Jolivet ierr = MatSeqSBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 18456bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 1858c0a8bd1SPierre Jolivet #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 18656bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 18756bd94f4SPierre Jolivet descr.mode = SPARSE_FILL_MODE_UPPER; 18856bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 18956bd94f4SPierre Jolivet #endif 19056bd94f4SPierre Jolivet } 19156bd94f4SPierre Jolivet } 19256bd94f4SPierre Jolivet } 19356bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 19456bd94f4SPierre Jolivet ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 19556bd94f4SPierre Jolivet } 19656bd94f4SPierre Jolivet 19756bd94f4SPierre Jolivet ierr = MatViewFromOptions(A, NULL, "-A_view");CHKERRQ(ierr); 19856bd94f4SPierre Jolivet 19956bd94f4SPierre Jolivet for (k = 0; k < nN; ++k) { 20056bd94f4SPierre Jolivet MatType Atype, Ctype; 20156bd94f4SPierre Jolivet PetscInt AM, AN, CM, CN, t; 202956f8c0dSBarry Smith #if defined(PETSC_USE_LOG) 20356bd94f4SPierre Jolivet PetscLogStage stage, tstage; 20456bd94f4SPierre Jolivet char stage_s[256]; 205956f8c0dSBarry Smith #endif 20656bd94f4SPierre Jolivet 20756bd94f4SPierre Jolivet ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);CHKERRQ(ierr); 20856bd94f4SPierre Jolivet ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);CHKERRQ(ierr); 20956bd94f4SPierre Jolivet ierr = MatSetRandom(C, NULL);CHKERRQ(ierr); 21056bd94f4SPierre Jolivet if (cuda) { /* convert to GPU if needed */ 21156bd94f4SPierre Jolivet ierr = MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 21256bd94f4SPierre Jolivet ierr = MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);CHKERRQ(ierr); 21356bd94f4SPierre Jolivet } 21456bd94f4SPierre Jolivet if (mkl) { 21556bd94f4SPierre Jolivet if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 21656bd94f4SPierre Jolivet else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 21756bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 21856bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 21956bd94f4SPierre Jolivet } 22056bd94f4SPierre Jolivet ierr = MatGetType(A, &Atype);CHKERRQ(ierr); 22156bd94f4SPierre Jolivet ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 22256bd94f4SPierre Jolivet ierr = MatGetSize(A, &AM, &AN);CHKERRQ(ierr); 22356bd94f4SPierre Jolivet ierr = MatGetSize(C, &CM, &CN);CHKERRQ(ierr); 22456bd94f4SPierre Jolivet 225956f8c0dSBarry Smith #if defined(PETSC_USE_LOG) 22656bd94f4SPierre Jolivet if (!maij || N[k] > 1) { 22756bd94f4SPierre Jolivet ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 22856bd94f4SPierre Jolivet ierr = PetscLogStageRegister(stage_s, &stage);CHKERRQ(ierr); 22956bd94f4SPierre Jolivet } 23056bd94f4SPierre Jolivet if (trans && N[k] > 1) { 23156bd94f4SPierre Jolivet ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 23256bd94f4SPierre Jolivet ierr = PetscLogStageRegister(stage_s, &tstage);CHKERRQ(ierr); 23356bd94f4SPierre Jolivet } 234956f8c0dSBarry Smith #endif 23556bd94f4SPierre Jolivet /* A*B */ 23656bd94f4SPierre Jolivet if (N[k] > 1) { 23756bd94f4SPierre Jolivet if (!maij) { 23856bd94f4SPierre Jolivet ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 23956bd94f4SPierre Jolivet ierr = MatProductSetType(D, MATPRODUCT_AB);CHKERRQ(ierr); 24056bd94f4SPierre Jolivet ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 24156bd94f4SPierre Jolivet ierr = MatProductSymbolic(D);CHKERRQ(ierr); 24256bd94f4SPierre Jolivet } 24356bd94f4SPierre Jolivet 24456bd94f4SPierre Jolivet if (!mkl) { 24556bd94f4SPierre Jolivet if (!maij) { 24656bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 2471e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 24856bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 24956bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 25056bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 25156bd94f4SPierre Jolivet } 25256bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 25356bd94f4SPierre Jolivet } else { 25456bd94f4SPierre Jolivet Mat E, Ct, Dt; 25556bd94f4SPierre Jolivet Vec cC, cD; 25656bd94f4SPierre Jolivet const PetscScalar *c_ptr; 25756bd94f4SPierre Jolivet PetscScalar *d_ptr; 25856bd94f4SPierre Jolivet ierr = MatCreateMAIJ(A, N[k], &E);CHKERRQ(ierr); 25956bd94f4SPierre Jolivet ierr = MatDenseGetLocalMatrix(C, &Ct);CHKERRQ(ierr); 26056bd94f4SPierre Jolivet ierr = MatDenseGetLocalMatrix(D, &Dt);CHKERRQ(ierr); 26156bd94f4SPierre Jolivet ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 26256bd94f4SPierre Jolivet ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 26356bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 26456bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 26556bd94f4SPierre Jolivet ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);CHKERRQ(ierr); 26656bd94f4SPierre Jolivet ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);CHKERRQ(ierr); 26756bd94f4SPierre Jolivet ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 2681e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1);CHKERRQ(ierr); 26956bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 27056bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 27156bd94f4SPierre Jolivet ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 27256bd94f4SPierre Jolivet } 27356bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 27456bd94f4SPierre Jolivet ierr = VecDestroy(&cD);CHKERRQ(ierr); 27556bd94f4SPierre Jolivet ierr = VecDestroy(&cC);CHKERRQ(ierr); 27656bd94f4SPierre Jolivet ierr = MatDestroy(&E);CHKERRQ(ierr); 27756bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 27856bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 27956bd94f4SPierre Jolivet ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 28056bd94f4SPierre Jolivet ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 28156bd94f4SPierre Jolivet } 28256bd94f4SPierre Jolivet } else { 28356bd94f4SPierre Jolivet const PetscScalar *c_ptr; 28456bd94f4SPierre Jolivet PetscScalar *d_ptr; 28556bd94f4SPierre Jolivet 28656bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 28756bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 28856bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 2891e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 29056bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 29156bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 29256bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 29356bd94f4SPierre Jolivet } 29456bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 29556bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 29656bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 29756bd94f4SPierre Jolivet } 29856bd94f4SPierre Jolivet } else if (maij) { 29956bd94f4SPierre Jolivet ierr = MatDestroy(&C);CHKERRQ(ierr); 30056bd94f4SPierre Jolivet ierr = MatDestroy(&D);CHKERRQ(ierr); 30156bd94f4SPierre Jolivet continue; 30256bd94f4SPierre Jolivet } else if (!mkl) { 30356bd94f4SPierre Jolivet Vec cC, cD; 30456bd94f4SPierre Jolivet 30556bd94f4SPierre Jolivet ierr = MatDenseGetColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 30656bd94f4SPierre Jolivet ierr = MatDenseGetColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 30756bd94f4SPierre Jolivet ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 3081e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN);CHKERRQ(ierr); 30956bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 31056bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 31156bd94f4SPierre Jolivet ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 31256bd94f4SPierre Jolivet } 31356bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 31456bd94f4SPierre Jolivet ierr = MatDenseRestoreColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 31556bd94f4SPierre Jolivet ierr = MatDenseRestoreColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 31656bd94f4SPierre Jolivet } else { 31756bd94f4SPierre Jolivet const PetscScalar *c_ptr; 31856bd94f4SPierre Jolivet PetscScalar *d_ptr; 31956bd94f4SPierre Jolivet 32056bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 32156bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 3221e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN);CHKERRQ(ierr); 32356bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 32456bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 32556bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 32656bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 32756bd94f4SPierre Jolivet } 32856bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 32956bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 33056bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 33156bd94f4SPierre Jolivet } 33256bd94f4SPierre Jolivet 33356bd94f4SPierre Jolivet if (check) { 33456bd94f4SPierre Jolivet ierr = MatMatMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 33556bd94f4SPierre Jolivet if (!flg) { 33656bd94f4SPierre Jolivet MatType Dtype; 33756bd94f4SPierre Jolivet 33856bd94f4SPierre Jolivet ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 33956bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 34056bd94f4SPierre Jolivet } 34156bd94f4SPierre Jolivet } 34256bd94f4SPierre Jolivet 34356bd94f4SPierre Jolivet /* MKL implementation seems buggy for ABt */ 34456bd94f4SPierre Jolivet /* A*Bt */ 34556bd94f4SPierre Jolivet if (!mkl && trans && N[k] > 1) { 34656bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 34756bd94f4SPierre Jolivet if (flg) { 34856bd94f4SPierre Jolivet ierr = MatTranspose(C, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 34956bd94f4SPierre Jolivet ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 35056bd94f4SPierre Jolivet if (!mkl) { 35156bd94f4SPierre Jolivet ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 35256bd94f4SPierre Jolivet ierr = MatProductSetType(D, MATPRODUCT_ABt);CHKERRQ(ierr); 35356bd94f4SPierre Jolivet ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 35456bd94f4SPierre Jolivet ierr = MatProductSymbolic(D);CHKERRQ(ierr); 35556bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 3561e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 35756bd94f4SPierre Jolivet ierr = PetscLogStagePush(tstage);CHKERRQ(ierr); 35856bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 35956bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 36056bd94f4SPierre Jolivet } 36156bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 36256bd94f4SPierre Jolivet } else { 36356bd94f4SPierre Jolivet const PetscScalar *c_ptr; 36456bd94f4SPierre Jolivet PetscScalar *d_ptr; 36556bd94f4SPierre Jolivet 36656bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 36756bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 36856bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 36956bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 3701e1ea65dSPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 37156bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 37256bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 37356bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 37456bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 37556bd94f4SPierre Jolivet } 37656bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 37756bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 37856bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 37956bd94f4SPierre Jolivet } 38056bd94f4SPierre Jolivet } 38156bd94f4SPierre Jolivet } 38256bd94f4SPierre Jolivet 38356bd94f4SPierre Jolivet if (!mkl && trans && N[k] > 1 && flg && check) { 38456bd94f4SPierre Jolivet ierr = MatMatTransposeMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 38556bd94f4SPierre Jolivet if (!flg) { 38656bd94f4SPierre Jolivet MatType Dtype; 38756bd94f4SPierre Jolivet ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 38856bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 38956bd94f4SPierre Jolivet } 39056bd94f4SPierre Jolivet } 39156bd94f4SPierre Jolivet ierr = MatDestroy(&C);CHKERRQ(ierr); 39256bd94f4SPierre Jolivet ierr = MatDestroy(&D);CHKERRQ(ierr); 39356bd94f4SPierre Jolivet } 39456bd94f4SPierre Jolivet if (mkl) { 39556bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 39656bd94f4SPierre Jolivet ierr = PetscFree(ia_ptr);CHKERRQ(ierr); 39756bd94f4SPierre Jolivet ierr = PetscFree(ja_ptr);CHKERRQ(ierr); 39856bd94f4SPierre Jolivet } 39956bd94f4SPierre Jolivet if (cuda && i != ntype - 1) { 40056bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");CHKERRQ(ierr); 40156bd94f4SPierre Jolivet break; 40256bd94f4SPierre Jolivet } 40356bd94f4SPierre Jolivet } 40456bd94f4SPierre Jolivet if (E != A) { 40556bd94f4SPierre Jolivet ierr = MatDestroy(&E);CHKERRQ(ierr); 40656bd94f4SPierre Jolivet } 40756bd94f4SPierre Jolivet ierr = MatDestroy(&A);CHKERRQ(ierr); 40856bd94f4SPierre Jolivet } 40956bd94f4SPierre Jolivet for (m = 0; m < ntype; ++m) { 41056bd94f4SPierre Jolivet ierr = PetscFree(type[m]);CHKERRQ(ierr); 41156bd94f4SPierre Jolivet } 41256bd94f4SPierre Jolivet ierr = PetscFinalize(); 41356bd94f4SPierre Jolivet return 0; 41456bd94f4SPierre Jolivet } 41556bd94f4SPierre Jolivet 41656bd94f4SPierre Jolivet /*TEST 417b8582c7cSSatish Balay build: 418*dfd57a17SPierre Jolivet requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 41956bd94f4SPierre Jolivet 42056bd94f4SPierre Jolivet testset: 42156bd94f4SPierre Jolivet nsize: 1 42256bd94f4SPierre Jolivet filter: sed "/Benchmarking/d" 42356bd94f4SPierre Jolivet args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 42456bd94f4SPierre Jolivet test: 42556bd94f4SPierre Jolivet suffix: basic 42656bd94f4SPierre Jolivet args: -type aij,sbaij,baij 42756bd94f4SPierre Jolivet output_file: output/ex237.out 42856bd94f4SPierre Jolivet test: 42956bd94f4SPierre Jolivet suffix: maij 43056bd94f4SPierre Jolivet args: -type aij,maij 43156bd94f4SPierre Jolivet output_file: output/ex237.out 43256bd94f4SPierre Jolivet test: 43356bd94f4SPierre Jolivet suffix: cuda 43456bd94f4SPierre Jolivet requires: cuda 43556bd94f4SPierre Jolivet args: -type aij,aijcusparse 43656bd94f4SPierre Jolivet output_file: output/ex237.out 43756bd94f4SPierre Jolivet test: 43856bd94f4SPierre Jolivet suffix: mkl 43956bd94f4SPierre Jolivet requires: mkl 44056bd94f4SPierre Jolivet args: -type aij,aijmkl,baijmkl,sbaijmkl 44156bd94f4SPierre Jolivet output_file: output/ex237.out 44256bd94f4SPierre Jolivet 44356bd94f4SPierre Jolivet TEST*/ 444