17072be85SIrina Sokolova /* 27072be85SIrina Sokolova Defines basic operations for the MATSEQBAIJMKL matrix class. 37072be85SIrina Sokolova Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4da81f932SPierre Jolivet wherever possible. If used MKL version is older than 11.3 PETSc default 57072be85SIrina Sokolova code for sparse matrix operations is used. 67072be85SIrina Sokolova */ 77072be85SIrina Sokolova 87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h> 97072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 10bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 11bdfea6dbSBarry Smith #define MKL_ILP64 12bdfea6dbSBarry Smith #endif 13b9e7e5c1SBarry Smith #include <mkl_spblas.h> 147072be85SIrina Sokolova 15d71ae5a4SJacob Faibussowitsch static PetscBool PetscSeqBAIJSupportsZeroBased(void) 16d71ae5a4SJacob Faibussowitsch { 17b9e7e5c1SBarry Smith static PetscBool set = PETSC_FALSE, value; 18b9e7e5c1SBarry Smith int n = 1, ia[1], ja[1]; 19b9e7e5c1SBarry Smith float a[1]; 20b9e7e5c1SBarry Smith sparse_status_t status; 21b9e7e5c1SBarry Smith sparse_matrix_t A; 227072be85SIrina Sokolova 23b9e7e5c1SBarry Smith if (!set) { 24bdfea6dbSBarry Smith status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a); 25b9e7e5c1SBarry Smith value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 26b9e7e5c1SBarry Smith (void)mkl_sparse_destroy(A); 27b9e7e5c1SBarry Smith set = PETSC_TRUE; 28b9e7e5c1SBarry Smith } 29b9e7e5c1SBarry Smith return value; 30b9e7e5c1SBarry Smith } 317072be85SIrina Sokolova 327072be85SIrina Sokolova typedef struct { 337072be85SIrina Sokolova PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 347072be85SIrina Sokolova sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 357072be85SIrina Sokolova struct matrix_descr descr; 367072be85SIrina Sokolova PetscInt *ai1; 377072be85SIrina Sokolova PetscInt *aj1; 387072be85SIrina Sokolova } Mat_SeqBAIJMKL; 397072be85SIrina Sokolova 40b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 417072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType); 427072be85SIrina Sokolova 43d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 44d71ae5a4SJacob Faibussowitsch { 457072be85SIrina Sokolova /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 467072be85SIrina Sokolova /* so we will ignore 'MatType type'. */ 477072be85SIrina Sokolova Mat B = *newmat; 487072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 497072be85SIrina Sokolova 507072be85SIrina Sokolova PetscFunctionBegin; 519566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 527072be85SIrina Sokolova 537072be85SIrina Sokolova /* Reset the original function pointers. */ 547072be85SIrina Sokolova B->ops->duplicate = MatDuplicate_SeqBAIJ; 557072be85SIrina Sokolova B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 567072be85SIrina Sokolova B->ops->destroy = MatDestroy_SeqBAIJ; 577072be85SIrina Sokolova B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 587072be85SIrina Sokolova B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 597072be85SIrina Sokolova B->ops->scale = MatScale_SeqBAIJ; 607072be85SIrina Sokolova B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 617072be85SIrina Sokolova B->ops->axpy = MatAXPY_SeqBAIJ; 627072be85SIrina Sokolova 637072be85SIrina Sokolova switch (A->rmap->bs) { 647072be85SIrina Sokolova case 1: 657072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_1; 667072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_1; 677072be85SIrina Sokolova break; 687072be85SIrina Sokolova case 2: 697072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_2; 707072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_2; 717072be85SIrina Sokolova break; 727072be85SIrina Sokolova case 3: 737072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_3; 747072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_3; 757072be85SIrina Sokolova break; 767072be85SIrina Sokolova case 4: 777072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_4; 787072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_4; 797072be85SIrina Sokolova break; 807072be85SIrina Sokolova case 5: 817072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_5; 827072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_5; 837072be85SIrina Sokolova break; 847072be85SIrina Sokolova case 6: 857072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_6; 867072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_6; 877072be85SIrina Sokolova break; 887072be85SIrina Sokolova case 7: 897072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_7; 907072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_7; 917072be85SIrina Sokolova break; 927072be85SIrina Sokolova case 11: 937072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_11; 947072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_11; 957072be85SIrina Sokolova break; 967072be85SIrina Sokolova case 15: 977072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_15_ver1; 987072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_N; 997072be85SIrina Sokolova break; 1007072be85SIrina Sokolova default: 1017072be85SIrina Sokolova B->ops->mult = MatMult_SeqBAIJ_N; 1027072be85SIrina Sokolova B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1037072be85SIrina Sokolova break; 1047072be85SIrina Sokolova } 1059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL)); 1067072be85SIrina Sokolova 1077072be85SIrina Sokolova /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 1087072be85SIrina Sokolova * simply involves destroying the MKL sparse matrix handle and then freeing 1097072be85SIrina Sokolova * the spptr pointer. */ 1107072be85SIrina Sokolova if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr; 1117072be85SIrina Sokolova 112792fecdfSBarry Smith if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 1139566063dSJacob Faibussowitsch PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 1149566063dSJacob Faibussowitsch PetscCall(PetscFree(B->spptr)); 1157072be85SIrina Sokolova 1167072be85SIrina Sokolova /* Change the type of B to MATSEQBAIJ. */ 1179566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 1187072be85SIrina Sokolova 1197072be85SIrina Sokolova *newmat = B; 1203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1217072be85SIrina Sokolova } 1227072be85SIrina Sokolova 123d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 124d71ae5a4SJacob Faibussowitsch { 1257072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 1267072be85SIrina Sokolova 1277072be85SIrina Sokolova PetscFunctionBegin; 1287072be85SIrina Sokolova if (baijmkl) { 1297072be85SIrina Sokolova /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 130792fecdfSBarry Smith if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 1319566063dSJacob Faibussowitsch PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 1329566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 1337072be85SIrina Sokolova } 1347072be85SIrina Sokolova 1357072be85SIrina Sokolova /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 1367072be85SIrina Sokolova * to destroy everything that remains. */ 1379566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 1389566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqBAIJ(A)); 1393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1407072be85SIrina Sokolova } 1417072be85SIrina Sokolova 142d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 143d71ae5a4SJacob Faibussowitsch { 1447072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1457072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 1460835cbf9SRichard Tran Mills PetscInt mbs, nbs, nz, bs; 1477072be85SIrina Sokolova MatScalar *aa; 1487072be85SIrina Sokolova PetscInt *aj, *ai; 14980278ffbSSatish Balay PetscInt i; 1507072be85SIrina Sokolova 1517072be85SIrina Sokolova PetscFunctionBegin; 1527072be85SIrina Sokolova if (baijmkl->sparse_optimized) { 1537072be85SIrina Sokolova /* Matrix has been previously assembled and optimized. Must destroy old 1547072be85SIrina Sokolova * matrix handle before running the optimization step again. */ 1559566063dSJacob Faibussowitsch PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 1569566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 1577072be85SIrina Sokolova } 1587072be85SIrina Sokolova baijmkl->sparse_optimized = PETSC_FALSE; 1597072be85SIrina Sokolova 1607072be85SIrina Sokolova /* Now perform the SpMV2 setup and matrix optimization. */ 1617072be85SIrina Sokolova baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 1627072be85SIrina Sokolova baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 1637072be85SIrina Sokolova baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 1647072be85SIrina Sokolova mbs = a->mbs; 1657072be85SIrina Sokolova nbs = a->nbs; 1667072be85SIrina Sokolova nz = a->nz; 1677072be85SIrina Sokolova bs = A->rmap->bs; 1687072be85SIrina Sokolova aa = a->a; 1697072be85SIrina Sokolova 170*f4f49eeaSPierre Jolivet if ((nz != 0) & !A->structure_only) { 1717072be85SIrina Sokolova /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1727072be85SIrina Sokolova * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 173b9e7e5c1SBarry Smith if (PetscSeqBAIJSupportsZeroBased()) { 1747072be85SIrina Sokolova aj = a->j; 1757072be85SIrina Sokolova ai = a->i; 176*f4f49eeaSPierre Jolivet PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa)); 177b9e7e5c1SBarry Smith } else { 1789566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj)); 179b9e7e5c1SBarry Smith for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1; 180b9e7e5c1SBarry Smith for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1; 1817072be85SIrina Sokolova aa = a->a; 18245d28df7SStefano Zampini PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa)); 1837072be85SIrina Sokolova baijmkl->ai1 = ai; 1847072be85SIrina Sokolova baijmkl->aj1 = aj; 185b9e7e5c1SBarry Smith } 1869566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000)); 1879566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE)); 1889566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 1897072be85SIrina Sokolova baijmkl->sparse_optimized = PETSC_TRUE; 1907072be85SIrina Sokolova } 1913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1927072be85SIrina Sokolova } 1937072be85SIrina Sokolova 194d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 195d71ae5a4SJacob Faibussowitsch { 1967072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl; 1977072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl_dest; 1987072be85SIrina Sokolova 1997072be85SIrina Sokolova PetscFunctionBegin; 2009566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqBAIJ(A, op, M)); 2017072be85SIrina Sokolova baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 2024dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&baijmkl_dest)); 20371bc03e0SIrina Sokolova (*M)->spptr = (void *)baijmkl_dest; 2049566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL))); 2057072be85SIrina Sokolova baijmkl_dest->sparse_optimized = PETSC_FALSE; 2069566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2087072be85SIrina Sokolova } 2097072be85SIrina Sokolova 210d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 211d71ae5a4SJacob Faibussowitsch { 2127072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2137072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 2147072be85SIrina Sokolova const PetscScalar *x; 2157072be85SIrina Sokolova PetscScalar *y; 2167072be85SIrina Sokolova 2177072be85SIrina Sokolova PetscFunctionBegin; 2187072be85SIrina Sokolova /* If there are no nonzero entries, zero yy and return immediately. */ 2197072be85SIrina Sokolova if (!a->nz) { 2209566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 2213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2227072be85SIrina Sokolova } 2237072be85SIrina Sokolova 2249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2259566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2267072be85SIrina Sokolova 2277072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2287072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2297072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 23048a46eb9SPierre Jolivet if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2317072be85SIrina Sokolova 2327072be85SIrina Sokolova /* Call MKL SpMV2 executor routine to do the MatMult. */ 2339566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 2347072be85SIrina Sokolova 2359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 2369566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2397072be85SIrina Sokolova } 2407072be85SIrina Sokolova 241d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 242d71ae5a4SJacob Faibussowitsch { 2437072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2447072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 2457072be85SIrina Sokolova const PetscScalar *x; 2467072be85SIrina Sokolova PetscScalar *y; 2477072be85SIrina Sokolova 2487072be85SIrina Sokolova PetscFunctionBegin; 2497072be85SIrina Sokolova /* If there are no nonzero entries, zero yy and return immediately. */ 2507072be85SIrina Sokolova if (!a->nz) { 2519566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 2523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2537072be85SIrina Sokolova } 2547072be85SIrina Sokolova 2559566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2569566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 2577072be85SIrina Sokolova 2587072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2597072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2607072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 26148a46eb9SPierre Jolivet if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2627072be85SIrina Sokolova 2637072be85SIrina Sokolova /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 2649566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 2657072be85SIrina Sokolova 2669566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 2679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 2689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 2693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2707072be85SIrina Sokolova } 2717072be85SIrina Sokolova 272d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 273d71ae5a4SJacob Faibussowitsch { 2747072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2757072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 2767072be85SIrina Sokolova const PetscScalar *x; 2777072be85SIrina Sokolova PetscScalar *y, *z; 2787072be85SIrina Sokolova PetscInt m = a->mbs * A->rmap->bs; 2797072be85SIrina Sokolova PetscInt i; 2807072be85SIrina Sokolova 2817072be85SIrina Sokolova PetscFunctionBegin; 2827072be85SIrina Sokolova /* If there are no nonzero entries, set zz = yy and return immediately. */ 2837072be85SIrina Sokolova if (!a->nz) { 2849566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 2853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2867072be85SIrina Sokolova } 2877072be85SIrina Sokolova 2889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 2899566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 2907072be85SIrina Sokolova 2917072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 2927072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 2937072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 29448a46eb9SPierre Jolivet if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 2957072be85SIrina Sokolova 2967072be85SIrina Sokolova /* Call MKL sparse BLAS routine to do the MatMult. */ 2977072be85SIrina Sokolova if (zz == yy) { 2987072be85SIrina Sokolova /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 2997072be85SIrina Sokolova * with alpha and beta both set to 1.0. */ 3009566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 3017072be85SIrina Sokolova } else { 3027072be85SIrina Sokolova /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 3037072be85SIrina Sokolova * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 3049566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 305ad540459SPierre Jolivet for (i = 0; i < m; i++) z[i] += y[i]; 3067072be85SIrina Sokolova } 3077072be85SIrina Sokolova 3089566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 3099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3109566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 3113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3127072be85SIrina Sokolova } 3137072be85SIrina Sokolova 314d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 315d71ae5a4SJacob Faibussowitsch { 3167072be85SIrina Sokolova Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 3177072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 3187072be85SIrina Sokolova const PetscScalar *x; 3197072be85SIrina Sokolova PetscScalar *y, *z; 3207072be85SIrina Sokolova PetscInt n = a->nbs * A->rmap->bs; 3217072be85SIrina Sokolova PetscInt i; 3227072be85SIrina Sokolova /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 3237072be85SIrina Sokolova 3247072be85SIrina Sokolova PetscFunctionBegin; 3257072be85SIrina Sokolova /* If there are no nonzero entries, set zz = yy and return immediately. */ 3267072be85SIrina Sokolova if (!a->nz) { 3279566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 3283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3297072be85SIrina Sokolova } 3307072be85SIrina Sokolova 3319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3329566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 3337072be85SIrina Sokolova 3347072be85SIrina Sokolova /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3357072be85SIrina Sokolova * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3367072be85SIrina Sokolova * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 33748a46eb9SPierre Jolivet if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 3387072be85SIrina Sokolova 3397072be85SIrina Sokolova /* Call MKL sparse BLAS routine to do the MatMult. */ 3407072be85SIrina Sokolova if (zz == yy) { 3417072be85SIrina Sokolova /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 3427072be85SIrina Sokolova * with alpha and beta both set to 1.0. */ 3439566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 3447072be85SIrina Sokolova } else { 3457072be85SIrina Sokolova /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 3467072be85SIrina Sokolova * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 3479566063dSJacob Faibussowitsch PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 348ad540459SPierre Jolivet for (i = 0; i < n; i++) z[i] += y[i]; 3497072be85SIrina Sokolova } 3507072be85SIrina Sokolova 3519566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 3529566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 3543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3557072be85SIrina Sokolova } 3567072be85SIrina Sokolova 357d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha) 358d71ae5a4SJacob Faibussowitsch { 3597072be85SIrina Sokolova PetscFunctionBegin; 3609566063dSJacob Faibussowitsch PetscCall(MatScale_SeqBAIJ(inA, alpha)); 3619566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 3623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3637072be85SIrina Sokolova } 3647072be85SIrina Sokolova 365d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr) 366d71ae5a4SJacob Faibussowitsch { 3677072be85SIrina Sokolova PetscFunctionBegin; 3689566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr)); 3699566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3717072be85SIrina Sokolova } 3727072be85SIrina Sokolova 373d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str) 374d71ae5a4SJacob Faibussowitsch { 3757072be85SIrina Sokolova PetscFunctionBegin; 3769566063dSJacob Faibussowitsch PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str)); 3777072be85SIrina Sokolova if (str == SAME_NONZERO_PATTERN) { 3787072be85SIrina Sokolova /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 3799566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 3807072be85SIrina Sokolova } 3813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3827072be85SIrina Sokolova } 3837072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 3847072be85SIrina Sokolova * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 3857072be85SIrina Sokolova * routine, but can also be used to convert an assembled SeqBAIJ matrix 3867072be85SIrina Sokolova * into a SeqBAIJMKL one. */ 387d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 388d71ae5a4SJacob Faibussowitsch { 3897072be85SIrina Sokolova Mat B = *newmat; 3907072be85SIrina Sokolova Mat_SeqBAIJMKL *baijmkl; 3917072be85SIrina Sokolova PetscBool sametype; 3927072be85SIrina Sokolova 3937072be85SIrina Sokolova PetscFunctionBegin; 3949566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 3957072be85SIrina Sokolova 3969566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 3973ba16761SJacob Faibussowitsch if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 3987072be85SIrina Sokolova 3994dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&baijmkl)); 4007072be85SIrina Sokolova B->spptr = (void *)baijmkl; 4017072be85SIrina Sokolova 4027072be85SIrina Sokolova /* Set function pointers for methods that we inherit from BAIJ but override. 4037072be85SIrina Sokolova * We also parse some command line options below, since those determine some of the methods we point to. */ 4047072be85SIrina Sokolova B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 4057072be85SIrina Sokolova 4067072be85SIrina Sokolova baijmkl->sparse_optimized = PETSC_FALSE; 4077072be85SIrina Sokolova 40814e4dea2SJose E. Roman PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL)); 4099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ)); 4107072be85SIrina Sokolova 4119566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL)); 4127072be85SIrina Sokolova *newmat = B; 4133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4147072be85SIrina Sokolova } 4159c46acdfSRichard Tran Mills 416d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 417d71ae5a4SJacob Faibussowitsch { 4184d6dccb5SIrina Sokolova PetscFunctionBegin; 4193ba16761SJacob Faibussowitsch if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 4209566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 4219566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 4224d6dccb5SIrina Sokolova A->ops->destroy = MatDestroy_SeqBAIJMKL; 4234d6dccb5SIrina Sokolova A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 4244d6dccb5SIrina Sokolova A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 4254d6dccb5SIrina Sokolova A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 4264d6dccb5SIrina Sokolova A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 4274d6dccb5SIrina Sokolova A->ops->scale = MatScale_SeqBAIJMKL; 4284d6dccb5SIrina Sokolova A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 4294d6dccb5SIrina Sokolova A->ops->axpy = MatAXPY_SeqBAIJMKL; 4304d6dccb5SIrina Sokolova A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 4313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4324d6dccb5SIrina Sokolova } 4339c46acdfSRichard Tran Mills 4347072be85SIrina Sokolova /*@C 43511a5261eSBarry Smith MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`. 43611a5261eSBarry Smith This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS 4377072be85SIrina Sokolova routines from Intel MKL whenever possible. 4387072be85SIrina Sokolova 4397072be85SIrina Sokolova Input Parameters: 44011a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 44120f4b53cSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 44220f4b53cSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 4437072be85SIrina Sokolova . m - number of rows 4447072be85SIrina Sokolova . n - number of columns 4457072be85SIrina Sokolova . nz - number of nonzero blocks per block row (same for all rows) 4467072be85SIrina Sokolova - nnz - array containing the number of nonzero blocks in the various block rows 44720f4b53cSBarry Smith (possibly different for each block row) or `NULL` 4487072be85SIrina Sokolova 4497072be85SIrina Sokolova Output Parameter: 4507072be85SIrina Sokolova . A - the matrix 4517072be85SIrina Sokolova 45211a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 453f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 45411a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 4557072be85SIrina Sokolova 4567072be85SIrina Sokolova Options Database Keys: 45711a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 458a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 4597072be85SIrina Sokolova 4607072be85SIrina Sokolova Level: intermediate 4617072be85SIrina Sokolova 4627072be85SIrina Sokolova Notes: 4637072be85SIrina Sokolova The number of rows and columns must be divisible by blocksize. 4647072be85SIrina Sokolova 46520f4b53cSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored 4667072be85SIrina Sokolova 4677072be85SIrina Sokolova A nonzero block is any block that as 1 or more nonzeros in it 4687072be85SIrina Sokolova 46920f4b53cSBarry Smith `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 47020f4b53cSBarry Smith operations are currently supported. 47120f4b53cSBarry Smith If the installed version of MKL supports the "SpMV2" sparse 47220f4b53cSBarry Smith inspector-executor routines, then those are used by default. 47320f4b53cSBarry Smith Default PETSc kernels are used otherwise. 47420f4b53cSBarry Smith 4752ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran 4767072be85SIrina Sokolova storage. That is, the stored row and column indices can begin at 4777072be85SIrina Sokolova either one (as in Fortran) or zero. See the users' manual for details. 4787072be85SIrina Sokolova 4797072be85SIrina Sokolova Specify the preallocated storage with either nz or nnz (not both). 48011a5261eSBarry Smith Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 481651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details. 4827072be85SIrina Sokolova matrices. 4837072be85SIrina Sokolova 484651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 4857072be85SIrina Sokolova @*/ 486d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 487d71ae5a4SJacob Faibussowitsch { 4887072be85SIrina Sokolova PetscFunctionBegin; 4899566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 4909566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 4919566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 4929566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz)); 4933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4947072be85SIrina Sokolova } 4959c46acdfSRichard Tran Mills 496d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 497d71ae5a4SJacob Faibussowitsch { 4987072be85SIrina Sokolova PetscFunctionBegin; 4999566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQBAIJ)); 5009566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A)); 5013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5027072be85SIrina Sokolova } 503