1*56bd94f4SPierre Jolivet static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2*56bd94f4SPierre Jolivet 3*56bd94f4SPierre Jolivet /* 4*56bd94f4SPierre Jolivet See the paper below for more information 5*56bd94f4SPierre Jolivet 6*56bd94f4SPierre Jolivet "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7*56bd94f4SPierre Jolivet P. Jolivet, J. E. Roman, and S. Zampini (2020). 8*56bd94f4SPierre Jolivet */ 9*56bd94f4SPierre Jolivet 10*56bd94f4SPierre Jolivet #include <petsc.h> 11*56bd94f4SPierre Jolivet 12*56bd94f4SPierre Jolivet #if defined(PETSC_HAVE_MKL) 13*56bd94f4SPierre Jolivet #include <mkl.h> 14*56bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do { \ 15*56bd94f4SPierre Jolivet sparse_status_t __ierr; \ 16*56bd94f4SPierre Jolivet PetscStackPush(#func); \ 17*56bd94f4SPierre Jolivet __ierr = func args; \ 18*56bd94f4SPierre Jolivet PetscStackPop; \ 19*56bd94f4SPierre Jolivet if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 20*56bd94f4SPierre Jolivet } while (0) 21*56bd94f4SPierre Jolivet #else 22*56bd94f4SPierre Jolivet #define PetscStackCallMKLSparse(func, args) do { \ 23*56bd94f4SPierre Jolivet SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 24*56bd94f4SPierre Jolivet } while (0) 25*56bd94f4SPierre Jolivet #endif 26*56bd94f4SPierre Jolivet 27*56bd94f4SPierre Jolivet int main(int argc, char** argv) { 28*56bd94f4SPierre Jolivet Mat A, C, D, E; 29*56bd94f4SPierre Jolivet PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 30*56bd94f4SPierre Jolivet PetscViewer viewer; 31*56bd94f4SPierre Jolivet PetscInt bs[10], N[8]; 32*56bd94f4SPierre Jolivet char *type[10]; 33*56bd94f4SPierre Jolivet PetscMPIInt size; 34*56bd94f4SPierre Jolivet PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 35*56bd94f4SPierre Jolivet char file[PETSC_MAX_PATH_LEN]; 36*56bd94f4SPierre Jolivet PetscErrorCode ierr; 37*56bd94f4SPierre Jolivet 38*56bd94f4SPierre Jolivet ierr = PetscInitialize(&argc, &argv, NULL, help);CHKERRQ(ierr); 39*56bd94f4SPierre Jolivet if (ierr) return ierr; 40*56bd94f4SPierre Jolivet ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 41*56bd94f4SPierre Jolivet if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 42*56bd94f4SPierre Jolivet ierr = PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);CHKERRQ(ierr); 43*56bd94f4SPierre Jolivet if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 44*56bd94f4SPierre Jolivet ierr = PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);CHKERRQ(ierr); 45*56bd94f4SPierre Jolivet ierr = PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);CHKERRQ(ierr); 46*56bd94f4SPierre Jolivet if (!flg) { 47*56bd94f4SPierre Jolivet nbs = 1; 48*56bd94f4SPierre Jolivet bs[0] = 1; 49*56bd94f4SPierre Jolivet } 50*56bd94f4SPierre Jolivet ierr = PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);CHKERRQ(ierr); 51*56bd94f4SPierre Jolivet if (!flg) { 52*56bd94f4SPierre Jolivet nN = 8; 53*56bd94f4SPierre Jolivet N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 54*56bd94f4SPierre Jolivet N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 55*56bd94f4SPierre Jolivet } 56*56bd94f4SPierre Jolivet ierr = PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);CHKERRQ(ierr); 57*56bd94f4SPierre Jolivet if (!flg) { 58*56bd94f4SPierre Jolivet ntype = 1; 59*56bd94f4SPierre Jolivet ierr = PetscStrallocpy(MATSEQAIJ, &type[0]);CHKERRQ(ierr); 60*56bd94f4SPierre Jolivet } 61*56bd94f4SPierre Jolivet ierr = PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);CHKERRQ(ierr); 62*56bd94f4SPierre Jolivet ierr = PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);CHKERRQ(ierr); 63*56bd94f4SPierre Jolivet ierr = PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);CHKERRQ(ierr); 64*56bd94f4SPierre Jolivet for (PetscInt j = 0; j < nbs; ++j) { 65*56bd94f4SPierre Jolivet ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);CHKERRQ(ierr); 66*56bd94f4SPierre Jolivet ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); 67*56bd94f4SPierre Jolivet ierr = MatSetFromOptions(A);CHKERRQ(ierr); 68*56bd94f4SPierre Jolivet ierr = MatLoad(A, viewer);CHKERRQ(ierr); 69*56bd94f4SPierre Jolivet ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 70*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 71*56bd94f4SPierre Jolivet if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 72*56bd94f4SPierre Jolivet ierr = MatGetSize(A, &m, &M);CHKERRQ(ierr); 73*56bd94f4SPierre Jolivet if (m == M) { 74*56bd94f4SPierre Jolivet Mat oA; 75*56bd94f4SPierre Jolivet ierr = MatTranspose(A, MAT_INITIAL_MATRIX, &oA);CHKERRQ(ierr); 76*56bd94f4SPierre Jolivet ierr = MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 77*56bd94f4SPierre Jolivet ierr = MatDestroy(&oA);CHKERRQ(ierr); 78*56bd94f4SPierre Jolivet } 79*56bd94f4SPierre Jolivet ierr = MatGetLocalSize(A, &m, NULL);CHKERRQ(ierr); 80*56bd94f4SPierre Jolivet ierr = MatGetSize(A, &M, NULL);CHKERRQ(ierr); 81*56bd94f4SPierre Jolivet if (bs[j] > 1) { 82*56bd94f4SPierre Jolivet Mat T, Tt, B; 83*56bd94f4SPierre Jolivet const PetscScalar *ptr; 84*56bd94f4SPierre Jolivet PetscScalar *val, *Aa; 85*56bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 86*56bd94f4SPierre Jolivet PetscInt An, i, k; 87*56bd94f4SPierre Jolivet PetscBool done; 88*56bd94f4SPierre Jolivet 89*56bd94f4SPierre Jolivet ierr = MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);CHKERRQ(ierr); 90*56bd94f4SPierre Jolivet ierr = MatSetRandom(T, NULL);CHKERRQ(ierr); 91*56bd94f4SPierre Jolivet ierr = MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);CHKERRQ(ierr); 92*56bd94f4SPierre Jolivet ierr = MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);CHKERRQ(ierr); 93*56bd94f4SPierre Jolivet ierr = MatDestroy(&Tt);CHKERRQ(ierr); 94*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(T, &ptr);CHKERRQ(ierr); 95*56bd94f4SPierre Jolivet ierr = MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 96*56bd94f4SPierre Jolivet if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 97*56bd94f4SPierre Jolivet ierr = MatSeqAIJGetArray(A, &Aa);CHKERRQ(ierr); 98*56bd94f4SPierre Jolivet ierr = MatCreate(PETSC_COMM_WORLD, &B);CHKERRQ(ierr); 99*56bd94f4SPierre Jolivet ierr = MatSetType(B, MATSEQBAIJ);CHKERRQ(ierr); 100*56bd94f4SPierre Jolivet ierr = MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); 101*56bd94f4SPierre Jolivet ierr = PetscMalloc1(Ai[An] * bs[j] * bs[j],&val);CHKERRQ(ierr); 102*56bd94f4SPierre Jolivet for(i = 0; i < Ai[An]; ++i) 103*56bd94f4SPierre Jolivet for(k = 0; k < bs[j] * bs[j]; ++k) 104*56bd94f4SPierre Jolivet val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 105*56bd94f4SPierre Jolivet ierr = MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);CHKERRQ(ierr); 106*56bd94f4SPierre Jolivet ierr = MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);CHKERRQ(ierr); 107*56bd94f4SPierre Jolivet ierr = PetscFree(val);CHKERRQ(ierr); 108*56bd94f4SPierre Jolivet ierr = MatSeqAIJRestoreArray(A, &Aa);CHKERRQ(ierr); 109*56bd94f4SPierre Jolivet ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 110*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(T, &ptr);CHKERRQ(ierr); 111*56bd94f4SPierre Jolivet ierr = MatDestroy(&T);CHKERRQ(ierr); 112*56bd94f4SPierre Jolivet ierr = MatDestroy(&A);CHKERRQ(ierr); 113*56bd94f4SPierre Jolivet A = B; 114*56bd94f4SPierre Jolivet } 115*56bd94f4SPierre Jolivet /* reconvert back to SeqAIJ before converting to the desired type later */ 116*56bd94f4SPierre Jolivet if (!convert) E = A; 117*56bd94f4SPierre Jolivet ierr = MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);CHKERRQ(ierr); 118*56bd94f4SPierre Jolivet ierr = MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);CHKERRQ(ierr); 119*56bd94f4SPierre Jolivet for (PetscInt i = 0; i < ntype; ++i) { 120*56bd94f4SPierre Jolivet char *tmp; 121*56bd94f4SPierre Jolivet PetscInt *ia_ptr, *ja_ptr, k; 122*56bd94f4SPierre Jolivet PetscScalar *a_ptr; 123*56bd94f4SPierre Jolivet #if defined(PETSC_HAVE_MKL) 124*56bd94f4SPierre Jolivet struct matrix_descr descr; 125*56bd94f4SPierre Jolivet sparse_matrix_t spr; 126*56bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_GENERAL; 127*56bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 128*56bd94f4SPierre Jolivet #endif 129*56bd94f4SPierre Jolivet if (convert) { 130*56bd94f4SPierre Jolivet ierr = MatDestroy(&A);CHKERRQ(ierr); 131*56bd94f4SPierre Jolivet } 132*56bd94f4SPierre Jolivet ierr = PetscStrstr(type[i], "mkl", &tmp);CHKERRQ(ierr); 133*56bd94f4SPierre Jolivet if (tmp) { 134*56bd94f4SPierre Jolivet size_t mlen, tlen; 135*56bd94f4SPierre Jolivet char base[256]; 136*56bd94f4SPierre Jolivet 137*56bd94f4SPierre Jolivet mkl = PETSC_TRUE; 138*56bd94f4SPierre Jolivet ierr = PetscStrlen(tmp, &mlen);CHKERRQ(ierr); 139*56bd94f4SPierre Jolivet ierr = PetscStrlen(type[i], &tlen);CHKERRQ(ierr); 140*56bd94f4SPierre Jolivet ierr = PetscStrncpy(base, type[i], tlen-mlen + 1);CHKERRQ(ierr); 141*56bd94f4SPierre Jolivet ierr = MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 142*56bd94f4SPierre Jolivet } else { 143*56bd94f4SPierre Jolivet mkl = PETSC_FALSE; 144*56bd94f4SPierre Jolivet ierr = PetscStrstr(type[i], "maij", &tmp);CHKERRQ(ierr); 145*56bd94f4SPierre Jolivet if (!tmp) { 146*56bd94f4SPierre Jolivet ierr = MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 147*56bd94f4SPierre Jolivet } else { 148*56bd94f4SPierre Jolivet ierr = MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 149*56bd94f4SPierre Jolivet maij = PETSC_TRUE; 150*56bd94f4SPierre Jolivet } 151*56bd94f4SPierre Jolivet } 152*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");CHKERRQ(ierr); 153*56bd94f4SPierre Jolivet if (mkl) { 154*56bd94f4SPierre Jolivet const PetscInt *Ai, *Aj; 155*56bd94f4SPierre Jolivet PetscInt An; 156*56bd94f4SPierre Jolivet PetscBool done; 157*56bd94f4SPierre Jolivet 158*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");CHKERRQ(ierr); 159*56bd94f4SPierre Jolivet if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 160*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 161*56bd94f4SPierre Jolivet ierr = MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 162*56bd94f4SPierre Jolivet if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 163*56bd94f4SPierre Jolivet ierr = PetscMalloc1(An + 1,&ia_ptr);CHKERRQ(ierr); 164*56bd94f4SPierre Jolivet ierr = PetscMalloc1(Ai[An],&ja_ptr);CHKERRQ(ierr); 165*56bd94f4SPierre Jolivet if (flg) { /* SeqAIJ */ 166*56bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 167*56bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 168*56bd94f4SPierre Jolivet ierr = MatSeqAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 169*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 170*56bd94f4SPierre Jolivet } else { 171*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);CHKERRQ(ierr); 172*56bd94f4SPierre Jolivet if (flg) { 173*56bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 174*56bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 175*56bd94f4SPierre Jolivet ierr = MatSeqBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 176*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 177*56bd94f4SPierre Jolivet } else { 178*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);CHKERRQ(ierr); 179*56bd94f4SPierre Jolivet if (flg) { 180*56bd94f4SPierre Jolivet for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 181*56bd94f4SPierre Jolivet for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 182*56bd94f4SPierre Jolivet ierr = MatSeqSBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 183*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 184*56bd94f4SPierre Jolivet #if defined(PETSC_HAVE_MKL) 185*56bd94f4SPierre Jolivet descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 186*56bd94f4SPierre Jolivet descr.mode = SPARSE_FILL_MODE_UPPER; 187*56bd94f4SPierre Jolivet descr.diag = SPARSE_DIAG_NON_UNIT; 188*56bd94f4SPierre Jolivet #endif 189*56bd94f4SPierre Jolivet } 190*56bd94f4SPierre Jolivet } 191*56bd94f4SPierre Jolivet } 192*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 193*56bd94f4SPierre Jolivet ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 194*56bd94f4SPierre Jolivet } 195*56bd94f4SPierre Jolivet 196*56bd94f4SPierre Jolivet ierr = MatViewFromOptions(A, NULL, "-A_view");CHKERRQ(ierr); 197*56bd94f4SPierre Jolivet 198*56bd94f4SPierre Jolivet for(k = 0; k < nN; ++k) { 199*56bd94f4SPierre Jolivet MatType Atype, Ctype; 200*56bd94f4SPierre Jolivet PetscInt AM, AN, CM, CN, t; 201*56bd94f4SPierre Jolivet PetscLogStage stage, tstage; 202*56bd94f4SPierre Jolivet char stage_s[256]; 203*56bd94f4SPierre Jolivet 204*56bd94f4SPierre Jolivet ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);CHKERRQ(ierr); 205*56bd94f4SPierre Jolivet ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);CHKERRQ(ierr); 206*56bd94f4SPierre Jolivet ierr = MatSetRandom(C, NULL);CHKERRQ(ierr); 207*56bd94f4SPierre Jolivet if (cuda) { /* convert to GPU if needed */ 208*56bd94f4SPierre Jolivet ierr = MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 209*56bd94f4SPierre Jolivet ierr = MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);CHKERRQ(ierr); 210*56bd94f4SPierre Jolivet } 211*56bd94f4SPierre Jolivet if (mkl) { 212*56bd94f4SPierre Jolivet if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 213*56bd94f4SPierre Jolivet else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 214*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 215*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 216*56bd94f4SPierre Jolivet } 217*56bd94f4SPierre Jolivet ierr = MatGetType(A, &Atype);CHKERRQ(ierr); 218*56bd94f4SPierre Jolivet ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 219*56bd94f4SPierre Jolivet ierr = MatGetSize(A, &AM, &AN);CHKERRQ(ierr); 220*56bd94f4SPierre Jolivet ierr = MatGetSize(C, &CM, &CN);CHKERRQ(ierr); 221*56bd94f4SPierre Jolivet 222*56bd94f4SPierre Jolivet if (!maij || N[k] > 1) { 223*56bd94f4SPierre Jolivet ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 224*56bd94f4SPierre Jolivet ierr = PetscLogStageRegister(stage_s, &stage);CHKERRQ(ierr); 225*56bd94f4SPierre Jolivet } 226*56bd94f4SPierre Jolivet if (trans && N[k] > 1) { 227*56bd94f4SPierre Jolivet ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 228*56bd94f4SPierre Jolivet ierr = PetscLogStageRegister(stage_s, &tstage);CHKERRQ(ierr); 229*56bd94f4SPierre Jolivet } 230*56bd94f4SPierre Jolivet 231*56bd94f4SPierre Jolivet /* A*B */ 232*56bd94f4SPierre Jolivet if (N[k] > 1) { 233*56bd94f4SPierre Jolivet if (!maij) { 234*56bd94f4SPierre Jolivet ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 235*56bd94f4SPierre Jolivet ierr = MatProductSetType(D, MATPRODUCT_AB);CHKERRQ(ierr); 236*56bd94f4SPierre Jolivet ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 237*56bd94f4SPierre Jolivet ierr = MatProductSymbolic(D);CHKERRQ(ierr); 238*56bd94f4SPierre Jolivet } 239*56bd94f4SPierre Jolivet 240*56bd94f4SPierre Jolivet if (!mkl) { 241*56bd94f4SPierre Jolivet if (!maij) { 242*56bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 243*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN); 244*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 245*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 246*56bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 247*56bd94f4SPierre Jolivet } 248*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 249*56bd94f4SPierre Jolivet } else { 250*56bd94f4SPierre Jolivet Mat E, Ct, Dt; 251*56bd94f4SPierre Jolivet Vec cC, cD; 252*56bd94f4SPierre Jolivet const PetscScalar *c_ptr; 253*56bd94f4SPierre Jolivet PetscScalar *d_ptr; 254*56bd94f4SPierre Jolivet ierr = MatCreateMAIJ(A, N[k], &E);CHKERRQ(ierr); 255*56bd94f4SPierre Jolivet ierr = MatDenseGetLocalMatrix(C, &Ct);CHKERRQ(ierr); 256*56bd94f4SPierre Jolivet ierr = MatDenseGetLocalMatrix(D, &Dt);CHKERRQ(ierr); 257*56bd94f4SPierre Jolivet ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 258*56bd94f4SPierre Jolivet ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 259*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 260*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 261*56bd94f4SPierre Jolivet ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);CHKERRQ(ierr); 262*56bd94f4SPierre Jolivet ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);CHKERRQ(ierr); 263*56bd94f4SPierre Jolivet ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 264*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1); 265*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 266*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 267*56bd94f4SPierre Jolivet ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 268*56bd94f4SPierre Jolivet } 269*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 270*56bd94f4SPierre Jolivet ierr = VecDestroy(&cD);CHKERRQ(ierr); 271*56bd94f4SPierre Jolivet ierr = VecDestroy(&cC);CHKERRQ(ierr); 272*56bd94f4SPierre Jolivet ierr = MatDestroy(&E);CHKERRQ(ierr); 273*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 274*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 275*56bd94f4SPierre Jolivet ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 276*56bd94f4SPierre Jolivet ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 277*56bd94f4SPierre Jolivet } 278*56bd94f4SPierre Jolivet } else { 279*56bd94f4SPierre Jolivet const PetscScalar *c_ptr; 280*56bd94f4SPierre Jolivet PetscScalar *d_ptr; 281*56bd94f4SPierre Jolivet 282*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 283*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 284*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 285*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN); 286*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 287*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 288*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 289*56bd94f4SPierre Jolivet } 290*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 291*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 292*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 293*56bd94f4SPierre Jolivet } 294*56bd94f4SPierre Jolivet } else if (maij) { 295*56bd94f4SPierre Jolivet ierr = MatDestroy(&C);CHKERRQ(ierr); 296*56bd94f4SPierre Jolivet ierr = MatDestroy(&D);CHKERRQ(ierr); 297*56bd94f4SPierre Jolivet continue; 298*56bd94f4SPierre Jolivet } else if (!mkl) { 299*56bd94f4SPierre Jolivet Vec cC, cD; 300*56bd94f4SPierre Jolivet 301*56bd94f4SPierre Jolivet ierr = MatDenseGetColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 302*56bd94f4SPierre Jolivet ierr = MatDenseGetColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 303*56bd94f4SPierre Jolivet ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 304*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN); 305*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 306*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 307*56bd94f4SPierre Jolivet ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 308*56bd94f4SPierre Jolivet } 309*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 310*56bd94f4SPierre Jolivet ierr = MatDenseRestoreColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 311*56bd94f4SPierre Jolivet ierr = MatDenseRestoreColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 312*56bd94f4SPierre Jolivet } else { 313*56bd94f4SPierre Jolivet const PetscScalar *c_ptr; 314*56bd94f4SPierre Jolivet PetscScalar *d_ptr; 315*56bd94f4SPierre Jolivet 316*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 317*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 318*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN); 319*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 320*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 321*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 322*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 323*56bd94f4SPierre Jolivet } 324*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 325*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 326*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 327*56bd94f4SPierre Jolivet } 328*56bd94f4SPierre Jolivet 329*56bd94f4SPierre Jolivet if (check) { 330*56bd94f4SPierre Jolivet ierr = MatMatMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 331*56bd94f4SPierre Jolivet if (!flg) { 332*56bd94f4SPierre Jolivet MatType Dtype; 333*56bd94f4SPierre Jolivet 334*56bd94f4SPierre Jolivet ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 335*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 336*56bd94f4SPierre Jolivet } 337*56bd94f4SPierre Jolivet } 338*56bd94f4SPierre Jolivet 339*56bd94f4SPierre Jolivet /* MKL implementation seems buggy for ABt */ 340*56bd94f4SPierre Jolivet /* A*Bt */ 341*56bd94f4SPierre Jolivet if (!mkl && trans && N[k] > 1) { 342*56bd94f4SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 343*56bd94f4SPierre Jolivet if (flg) { 344*56bd94f4SPierre Jolivet ierr = MatTranspose(C, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 345*56bd94f4SPierre Jolivet ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 346*56bd94f4SPierre Jolivet if (!mkl) { 347*56bd94f4SPierre Jolivet ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 348*56bd94f4SPierre Jolivet ierr = MatProductSetType(D, MATPRODUCT_ABt);CHKERRQ(ierr); 349*56bd94f4SPierre Jolivet ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 350*56bd94f4SPierre Jolivet ierr = MatProductSymbolic(D);CHKERRQ(ierr); 351*56bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 352*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN); 353*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(tstage);CHKERRQ(ierr); 354*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 355*56bd94f4SPierre Jolivet ierr = MatProductNumeric(D);CHKERRQ(ierr); 356*56bd94f4SPierre Jolivet } 357*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 358*56bd94f4SPierre Jolivet } else { 359*56bd94f4SPierre Jolivet const PetscScalar *c_ptr; 360*56bd94f4SPierre Jolivet PetscScalar *d_ptr; 361*56bd94f4SPierre Jolivet 362*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 363*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 364*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 365*56bd94f4SPierre Jolivet ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 366*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN); 367*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 368*56bd94f4SPierre Jolivet ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 369*56bd94f4SPierre Jolivet for (t = 0; t < trial; ++t) { 370*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 371*56bd94f4SPierre Jolivet } 372*56bd94f4SPierre Jolivet ierr = PetscLogStagePop();CHKERRQ(ierr); 373*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 374*56bd94f4SPierre Jolivet ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 375*56bd94f4SPierre Jolivet } 376*56bd94f4SPierre Jolivet } 377*56bd94f4SPierre Jolivet } 378*56bd94f4SPierre Jolivet 379*56bd94f4SPierre Jolivet if (!mkl && trans && N[k] > 1 && flg && check) { 380*56bd94f4SPierre Jolivet ierr = MatMatTransposeMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 381*56bd94f4SPierre Jolivet if (!flg) { 382*56bd94f4SPierre Jolivet MatType Dtype; 383*56bd94f4SPierre Jolivet ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 384*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 385*56bd94f4SPierre Jolivet } 386*56bd94f4SPierre Jolivet } 387*56bd94f4SPierre Jolivet ierr = MatDestroy(&C);CHKERRQ(ierr); 388*56bd94f4SPierre Jolivet ierr = MatDestroy(&D);CHKERRQ(ierr); 389*56bd94f4SPierre Jolivet } 390*56bd94f4SPierre Jolivet if (mkl) { 391*56bd94f4SPierre Jolivet PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 392*56bd94f4SPierre Jolivet ierr = PetscFree(ia_ptr);CHKERRQ(ierr); 393*56bd94f4SPierre Jolivet ierr = PetscFree(ja_ptr);CHKERRQ(ierr); 394*56bd94f4SPierre Jolivet } 395*56bd94f4SPierre Jolivet if (cuda && i != ntype - 1) { 396*56bd94f4SPierre Jolivet ierr = PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");CHKERRQ(ierr); 397*56bd94f4SPierre Jolivet break; 398*56bd94f4SPierre Jolivet } 399*56bd94f4SPierre Jolivet } 400*56bd94f4SPierre Jolivet if(E != A) { 401*56bd94f4SPierre Jolivet ierr = MatDestroy(&E);CHKERRQ(ierr); 402*56bd94f4SPierre Jolivet } 403*56bd94f4SPierre Jolivet ierr = MatDestroy(&A);CHKERRQ(ierr); 404*56bd94f4SPierre Jolivet } 405*56bd94f4SPierre Jolivet for (m = 0; m < ntype; ++m) { 406*56bd94f4SPierre Jolivet ierr = PetscFree(type[m]);CHKERRQ(ierr); 407*56bd94f4SPierre Jolivet } 408*56bd94f4SPierre Jolivet ierr = PetscFinalize(); 409*56bd94f4SPierre Jolivet return 0; 410*56bd94f4SPierre Jolivet } 411*56bd94f4SPierre Jolivet 412*56bd94f4SPierre Jolivet /*TEST 413*56bd94f4SPierre Jolivet 414*56bd94f4SPierre Jolivet testset: 415*56bd94f4SPierre Jolivet nsize: 1 416*56bd94f4SPierre Jolivet requires: double !complex !define(PETSC_USE_64BIT_INDICES) 417*56bd94f4SPierre Jolivet filter: sed "/Benchmarking/d" 418*56bd94f4SPierre Jolivet args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 419*56bd94f4SPierre Jolivet test: 420*56bd94f4SPierre Jolivet suffix: basic 421*56bd94f4SPierre Jolivet args: -type aij,sbaij,baij 422*56bd94f4SPierre Jolivet output_file: output/ex237.out 423*56bd94f4SPierre Jolivet test: 424*56bd94f4SPierre Jolivet suffix: maij 425*56bd94f4SPierre Jolivet args: -type aij,maij 426*56bd94f4SPierre Jolivet output_file: output/ex237.out 427*56bd94f4SPierre Jolivet test: 428*56bd94f4SPierre Jolivet suffix: cuda 429*56bd94f4SPierre Jolivet requires: cuda 430*56bd94f4SPierre Jolivet args: -type aij,aijcusparse 431*56bd94f4SPierre Jolivet output_file: output/ex237.out 432*56bd94f4SPierre Jolivet test: 433*56bd94f4SPierre Jolivet suffix: mkl 434*56bd94f4SPierre Jolivet requires: mkl 435*56bd94f4SPierre Jolivet args: -type aij,aijmkl,baijmkl,sbaijmkl 436*56bd94f4SPierre Jolivet output_file: output/ex237.out 437*56bd94f4SPierre Jolivet 438*56bd94f4SPierre Jolivet TEST*/ 439