14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 12bdfea6dbSBarry Smith #define MKL_ILP64 13bdfea6dbSBarry Smith #endif 14b9e7e5c1SBarry Smith #include <mkl_spblas.h> 154a2a386eSRichard Tran Mills 164a2a386eSRichard Tran Mills typedef struct { 17c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 185b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 194abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 20551aa5c8SRichard Tran Mills PetscObjectState state; 21ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 22df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 23df555b71SRichard Tran Mills struct matrix_descr descr; 24b8cbc1fbSRichard Tran Mills #endif 254a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 284a2a386eSRichard Tran Mills 29d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 30d71ae5a4SJacob Faibussowitsch { 314a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 324a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 334a2a386eSRichard Tran Mills Mat B = *newmat; 34ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 354a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 36c1d5218aSRichard Tran Mills #endif 374a2a386eSRichard Tran Mills 384a2a386eSRichard Tran Mills PetscFunctionBegin; 399566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 404a2a386eSRichard Tran Mills 414a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4254871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 434a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 444a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4554871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 46ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4754871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 48ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 50431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 51e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 52190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 53190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 544f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 554a2a386eSRichard Tran Mills 569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 574222ddf1SHong Zhang 58ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 594abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 60e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 61e9c94282SRichard Tran Mills * the spptr pointer. */ 62a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 63a8327b06SKarl Rupp 64792fecdfSBarry Smith if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 65ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 669566063dSJacob Faibussowitsch PetscCall(PetscFree(B->spptr)); 674a2a386eSRichard Tran Mills 684a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 699566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 704a2a386eSRichard Tran Mills 714a2a386eSRichard Tran Mills *newmat = B; 723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 734a2a386eSRichard Tran Mills } 744a2a386eSRichard Tran Mills 7566976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 76d71ae5a4SJacob Faibussowitsch { 774a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 784a2a386eSRichard Tran Mills 794a2a386eSRichard Tran Mills PetscFunctionBegin; 80edc89de7SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 81e9c94282SRichard Tran Mills if (aijmkl) { 824a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 83ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 84792fecdfSBarry Smith if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 854abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 869566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 87e9c94282SRichard Tran Mills } 884a2a386eSRichard Tran Mills 894a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 904a2a386eSRichard Tran Mills * to destroy everything that remains. */ 919566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 922fe279fdSBarry Smith /* I don't call MatSetType(). I believe this is because that 934a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 944a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 952e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 969566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 984a2a386eSRichard Tran Mills } 994a2a386eSRichard Tran Mills 100190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1015b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1025b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1035b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1045b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1055b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1065b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 107d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 108d71ae5a4SJacob Faibussowitsch { 109ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1106e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1116e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1126e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 11345fbe478SRichard Tran Mills PetscFunctionBegin; 1143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1156e369cd5SRichard Tran Mills #else 116a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 117a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 118a8327b06SKarl Rupp PetscInt m, n; 119a8327b06SKarl Rupp MatScalar *aa; 120a8327b06SKarl Rupp PetscInt *aj, *ai; 1214a2a386eSRichard Tran Mills 122a8327b06SKarl Rupp PetscFunctionBegin; 123e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 124e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 125e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 126e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 127e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1283ba16761SJacob Faibussowitsch if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS); 1294d51fa23SRichard Tran Mills #endif 1306e369cd5SRichard Tran Mills 1310632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1320632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1330632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 134792fecdfSBarry Smith PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 1350632b357SRichard Tran Mills } 1368d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1376e369cd5SRichard Tran Mills 138c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 139df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 140df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 141df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 14258678438SRichard Tran Mills m = A->rmap->n; 14358678438SRichard Tran Mills n = A->cmap->n; 144df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 145df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 146df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1471495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1488d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1498d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 150bdfea6dbSBarry Smith PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa); 151792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 152792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 15348a46eb9SPierre Jolivet if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 1544abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 155*f4f49eeaSPierre Jolivet PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 156190ae7a4SRichard Tran Mills } else { 157f3fa974cSJacob Faibussowitsch aijmkl->csrA = NULL; 158c9d46305SRichard Tran Mills } 1593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 160d995685eSRichard Tran Mills #endif 1616e369cd5SRichard Tran Mills } 1626e369cd5SRichard Tran Mills 163b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 164190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 165d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 166d71ae5a4SJacob Faibussowitsch { 16719afcda9SRichard Tran Mills sparse_index_base_t indexing; 168190ae7a4SRichard Tran Mills PetscInt m, n; 16945fbe478SRichard Tran Mills PetscInt *aj, *ai, *dummy; 17019afcda9SRichard Tran Mills MatScalar *aa; 17119afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 17219afcda9SRichard Tran Mills 173362febeeSStefano Zampini PetscFunctionBegin; 174190ae7a4SRichard Tran Mills if (csrA) { 17545fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 176bdfea6dbSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 1775f80ce2aSJacob Faibussowitsch PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 178190ae7a4SRichard Tran Mills } else { 179f3fa974cSJacob Faibussowitsch aj = ai = NULL; 180f3fa974cSJacob Faibussowitsch aa = NULL; 181aab60f1bSRichard Tran Mills } 182190ae7a4SRichard Tran Mills 1839566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 1849566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 185aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 186aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 187aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 188aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 189190ae7a4SRichard Tran Mills if (csrA) { 1909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 191190ae7a4SRichard Tran Mills } else { 192190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 1939566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 1949566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1959566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 196190ae7a4SRichard Tran Mills } 19719afcda9SRichard Tran Mills 19819afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 19919afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 2009566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 2016c87cf42SRichard Tran Mills 20219afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL *)A->spptr; 20319afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2046c87cf42SRichard Tran Mills 20519afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 20619afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 20719afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 208f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 209f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 210f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 211190ae7a4SRichard Tran Mills if (csrA) { 212792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 213792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 2141950a7e7SRichard Tran Mills } 215*f4f49eeaSPierre Jolivet PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 2163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21719afcda9SRichard Tran Mills } 218b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 219190ae7a4SRichard Tran Mills 220e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 221e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 222e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 223b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 224d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 225d71ae5a4SJacob Faibussowitsch { 226e8be1fc7SRichard Tran Mills PetscInt i; 227e8be1fc7SRichard Tran Mills PetscInt nrows, ncols; 228e8be1fc7SRichard Tran Mills PetscInt nz; 229e8be1fc7SRichard Tran Mills PetscInt *ai, *aj, *dummy; 230e8be1fc7SRichard Tran Mills PetscScalar *aa; 2311495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 232e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 233e8be1fc7SRichard Tran Mills 234362febeeSStefano Zampini PetscFunctionBegin; 235190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 2363ba16761SJacob Faibussowitsch if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS); 237190ae7a4SRichard Tran Mills 238e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 239bdfea6dbSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 240e8be1fc7SRichard Tran Mills 241e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 242e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 243e8be1fc7SRichard Tran Mills for (i = 0; i < nrows; i++) { 244e8be1fc7SRichard Tran Mills nz = ai[i + 1] - ai[i]; 2459566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 246e8be1fc7SRichard Tran Mills } 247e8be1fc7SRichard Tran Mills 2489566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2499566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 250e8be1fc7SRichard Tran Mills 251*f4f49eeaSPierre Jolivet PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 252a7180b50SRichard Tran Mills /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 253a7180b50SRichard Tran Mills * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 254a7180b50SRichard Tran Mills * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 255a7180b50SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 2563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 257e8be1fc7SRichard Tran Mills } 258b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 259e8be1fc7SRichard Tran Mills 2603849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 261d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 262d71ae5a4SJacob Faibussowitsch { 2633849ddb2SRichard Tran Mills PetscInt i, j, k; 2643849ddb2SRichard Tran Mills PetscInt nrows, ncols; 2653849ddb2SRichard Tran Mills PetscInt nz; 2663849ddb2SRichard Tran Mills PetscInt *ai, *aj, *dummy; 2673849ddb2SRichard Tran Mills PetscScalar *aa; 2681495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 2693849ddb2SRichard Tran Mills sparse_index_base_t indexing; 2703849ddb2SRichard Tran Mills 271362febeeSStefano Zampini PetscFunctionBegin; 2729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 2733849ddb2SRichard Tran Mills 2743849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 2753849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 2769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 2773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2783849ddb2SRichard Tran Mills } 2793849ddb2SRichard Tran Mills 2803849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 281bdfea6dbSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 2823849ddb2SRichard Tran Mills 2833849ddb2SRichard Tran Mills k = 0; 2843849ddb2SRichard Tran Mills for (i = 0; i < nrows; i++) { 2859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 2863849ddb2SRichard Tran Mills nz = ai[i + 1] - ai[i]; 2873849ddb2SRichard Tran Mills for (j = 0; j < nz; j++) { 2883849ddb2SRichard Tran Mills if (aa) { 2899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 2903849ddb2SRichard Tran Mills } else { 2919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 2923849ddb2SRichard Tran Mills } 2933849ddb2SRichard Tran Mills k++; 2943849ddb2SRichard Tran Mills } 2959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 2963849ddb2SRichard Tran Mills } 2973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2983849ddb2SRichard Tran Mills } 2993849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 3003849ddb2SRichard Tran Mills 30166976f2fSJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 302d71ae5a4SJacob Faibussowitsch { 3031495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 3046e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3056e369cd5SRichard Tran Mills 3066e369cd5SRichard Tran Mills PetscFunctionBegin; 3079566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 3086e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 3099566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 3106e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3111baa6e33SBarry Smith if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3136e369cd5SRichard Tran Mills } 3146e369cd5SRichard Tran Mills 31566976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 316d71ae5a4SJacob Faibussowitsch { 3176e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3185b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3196e369cd5SRichard Tran Mills 3206e369cd5SRichard Tran Mills PetscFunctionBegin; 3213ba16761SJacob Faibussowitsch if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 3226e369cd5SRichard Tran Mills 3236e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3246e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3256e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3266e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 327d96e85feSRichard Tran Mills * a lot of code duplication. */ 3286e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3299566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 3306e369cd5SRichard Tran Mills 3315b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3325b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3335b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL *)A->spptr; 3341baa6e33SBarry Smith if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3364a2a386eSRichard Tran Mills } 3374a2a386eSRichard Tran Mills 338e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 33966976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 340d71ae5a4SJacob Faibussowitsch { 3414a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3424a2a386eSRichard Tran Mills const PetscScalar *x; 3434a2a386eSRichard Tran Mills PetscScalar *y; 3444a2a386eSRichard Tran Mills const MatScalar *aa; 3454a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 346db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 347db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 348db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3494a2a386eSRichard Tran Mills const PetscInt *aj, *ai; 350db63039fSRichard Tran Mills char matdescra[6]; 351db63039fSRichard Tran Mills 3524a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 353ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 354ff03dc53SRichard Tran Mills 355ff03dc53SRichard Tran Mills PetscFunctionBegin; 356db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 357db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3599566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 360ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 361ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 362ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 363ff03dc53SRichard Tran Mills 364ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 365db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 366ff03dc53SRichard Tran Mills 3679566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 3689566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 3703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 371ff03dc53SRichard Tran Mills } 3721950a7e7SRichard Tran Mills #endif 373ff03dc53SRichard Tran Mills 374ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 375d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 376d71ae5a4SJacob Faibussowitsch { 377df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 378df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 379df555b71SRichard Tran Mills const PetscScalar *x; 380df555b71SRichard Tran Mills PetscScalar *y; 381551aa5c8SRichard Tran Mills PetscObjectState state; 382df555b71SRichard Tran Mills 383df555b71SRichard Tran Mills PetscFunctionBegin; 38438987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 38538987b35SRichard Tran Mills if (!a->nz) { 3869566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3879566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y, A->rmap->n)); 3889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 3893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39038987b35SRichard Tran Mills } 391f36dfe3fSRichard Tran Mills 3929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3939566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 394df555b71SRichard Tran Mills 3953fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3963fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3973fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3989566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 3999566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4003fa15762SRichard Tran Mills 401df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 402792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 403df555b71SRichard Tran Mills 4049566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4059566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 408df555b71SRichard Tran Mills } 409d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 410df555b71SRichard Tran Mills 411e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 41266976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 413d71ae5a4SJacob Faibussowitsch { 414ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 415ff03dc53SRichard Tran Mills const PetscScalar *x; 416ff03dc53SRichard Tran Mills PetscScalar *y; 417ff03dc53SRichard Tran Mills const MatScalar *aa; 418ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 419db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 420db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 421db63039fSRichard Tran Mills PetscScalar beta = 0.0; 422ff03dc53SRichard Tran Mills const PetscInt *aj, *ai; 423db63039fSRichard Tran Mills char matdescra[6]; 424ff03dc53SRichard Tran Mills 425ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 426ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4274a2a386eSRichard Tran Mills 4284a2a386eSRichard Tran Mills PetscFunctionBegin; 429969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 430969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4329566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4334a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4344a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4354a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4364a2a386eSRichard Tran Mills 4374a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 438db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 4394a2a386eSRichard Tran Mills 4409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4419566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4444a2a386eSRichard Tran Mills } 4451950a7e7SRichard Tran Mills #endif 4464a2a386eSRichard Tran Mills 447ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 448d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 449d71ae5a4SJacob Faibussowitsch { 450df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 451df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 452df555b71SRichard Tran Mills const PetscScalar *x; 453df555b71SRichard Tran Mills PetscScalar *y; 454551aa5c8SRichard Tran Mills PetscObjectState state; 455df555b71SRichard Tran Mills 456df555b71SRichard Tran Mills PetscFunctionBegin; 45738987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 45838987b35SRichard Tran Mills if (!a->nz) { 4599566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4609566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y, A->cmap->n)); 4619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 46338987b35SRichard Tran Mills } 464f36dfe3fSRichard Tran Mills 4659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4669566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 467df555b71SRichard Tran Mills 4683fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4693fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4703fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4719566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 4729566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4733fa15762SRichard Tran Mills 474df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 475792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 476df555b71SRichard Tran Mills 4779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4799566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 481df555b71SRichard Tran Mills } 482d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 483df555b71SRichard Tran Mills 484e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 48566976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 486d71ae5a4SJacob Faibussowitsch { 4874a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 4884a2a386eSRichard Tran Mills const PetscScalar *x; 4894a2a386eSRichard Tran Mills PetscScalar *y, *z; 4904a2a386eSRichard Tran Mills const MatScalar *aa; 4914a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 492db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 4934a2a386eSRichard Tran Mills const PetscInt *aj, *ai; 4944a2a386eSRichard Tran Mills PetscInt i; 4954a2a386eSRichard Tran Mills 496ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 497ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 498a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 499db63039fSRichard Tran Mills PetscScalar beta; 500a84739b8SRichard Tran Mills char matdescra[6]; 501ff03dc53SRichard Tran Mills 502ff03dc53SRichard Tran Mills PetscFunctionBegin; 503a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 504a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 505a84739b8SRichard Tran Mills 5069566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5079566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 508ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 509ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 510ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 511ff03dc53SRichard Tran Mills 512ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 513a84739b8SRichard Tran Mills if (zz == yy) { 514a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 515db63039fSRichard Tran Mills beta = 1.0; 516db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 517a84739b8SRichard Tran Mills } else { 518db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 519db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 520db63039fSRichard Tran Mills beta = 0.0; 521db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 522ad540459SPierre Jolivet for (i = 0; i < m; i++) z[i] += y[i]; 523a84739b8SRichard Tran Mills } 524ff03dc53SRichard Tran Mills 5259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 5269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 5283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 529ff03dc53SRichard Tran Mills } 5301950a7e7SRichard Tran Mills #endif 531ff03dc53SRichard Tran Mills 532ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 533d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 534d71ae5a4SJacob Faibussowitsch { 535df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 536df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 537df555b71SRichard Tran Mills const PetscScalar *x; 538df555b71SRichard Tran Mills PetscScalar *y, *z; 539df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 540df555b71SRichard Tran Mills PetscInt i; 541df555b71SRichard Tran Mills 542df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 543551aa5c8SRichard Tran Mills PetscObjectState state; 544df555b71SRichard Tran Mills 545df555b71SRichard Tran Mills PetscFunctionBegin; 54638987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 54738987b35SRichard Tran Mills if (!a->nz) { 5489566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 5499566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, y, m)); 5509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 5513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 55238987b35SRichard Tran Mills } 553df555b71SRichard Tran Mills 5549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5559566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 556df555b71SRichard Tran Mills 5573fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5583fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5593fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5609566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 5619566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 5623fa15762SRichard Tran Mills 563df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 564df555b71SRichard Tran Mills if (zz == yy) { 565df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 566df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 567792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 568df555b71SRichard Tran Mills } else { 569df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 570df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 571792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 5725f80ce2aSJacob Faibussowitsch for (i = 0; i < m; i++) z[i] += y[i]; 573df555b71SRichard Tran Mills } 574df555b71SRichard Tran Mills 5759566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 5769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 5783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 579df555b71SRichard Tran Mills } 580d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 581df555b71SRichard Tran Mills 582e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 58366976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 584d71ae5a4SJacob Faibussowitsch { 585ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 586ff03dc53SRichard Tran Mills const PetscScalar *x; 587ff03dc53SRichard Tran Mills PetscScalar *y, *z; 588ff03dc53SRichard Tran Mills const MatScalar *aa; 589ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 590db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 591ff03dc53SRichard Tran Mills const PetscInt *aj, *ai; 592ff03dc53SRichard Tran Mills PetscInt i; 593ff03dc53SRichard Tran Mills 594ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 595ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 596a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 597db63039fSRichard Tran Mills PetscScalar beta; 598a84739b8SRichard Tran Mills char matdescra[6]; 5994a2a386eSRichard Tran Mills 6004a2a386eSRichard Tran Mills PetscFunctionBegin; 601a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 602a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 603a84739b8SRichard Tran Mills 6049566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6059566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 6064a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6074a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6084a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6094a2a386eSRichard Tran Mills 6104a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 611a84739b8SRichard Tran Mills if (zz == yy) { 612a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 613db63039fSRichard Tran Mills beta = 1.0; 614969800c5SRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 615a84739b8SRichard Tran Mills } else { 616db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 617db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 618db63039fSRichard Tran Mills beta = 0.0; 619db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 620ad540459SPierre Jolivet for (i = 0; i < n; i++) z[i] += y[i]; 621a84739b8SRichard Tran Mills } 6224a2a386eSRichard Tran Mills 6239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 6249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6259566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 6263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6274a2a386eSRichard Tran Mills } 6281950a7e7SRichard Tran Mills #endif 6294a2a386eSRichard Tran Mills 630ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 631d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 632d71ae5a4SJacob Faibussowitsch { 633df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 634df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 635df555b71SRichard Tran Mills const PetscScalar *x; 636df555b71SRichard Tran Mills PetscScalar *y, *z; 637969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 638df555b71SRichard Tran Mills PetscInt i; 639551aa5c8SRichard Tran Mills PetscObjectState state; 640df555b71SRichard Tran Mills 641df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 642df555b71SRichard Tran Mills 643df555b71SRichard Tran Mills PetscFunctionBegin; 64438987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 64538987b35SRichard Tran Mills if (!a->nz) { 6469566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 6479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, y, n)); 6489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 6493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 65038987b35SRichard Tran Mills } 651f36dfe3fSRichard Tran Mills 6529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6539566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 654df555b71SRichard Tran Mills 6553fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6563fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6573fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 6589566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 6593ba16761SJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 6603fa15762SRichard Tran Mills 661df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 662df555b71SRichard Tran Mills if (zz == yy) { 663df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 664df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 665792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 666df555b71SRichard Tran Mills } else { 667df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 668df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 669792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 6705f80ce2aSJacob Faibussowitsch for (i = 0; i < n; i++) z[i] += y[i]; 671df555b71SRichard Tran Mills } 672df555b71SRichard Tran Mills 6739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 6749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 6763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 677df555b71SRichard Tran Mills } 678d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 679df555b71SRichard Tran Mills 6808a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 681d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 682d71ae5a4SJacob Faibussowitsch { 6831495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 684431879ecSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 685190ae7a4SRichard Tran Mills PetscInt nrows, ncols; 686431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 687431879ecSRichard Tran Mills PetscObjectState state; 688431879ecSRichard Tran Mills 689431879ecSRichard Tran Mills PetscFunctionBegin; 690190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 691190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 692190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 693190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 694190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 695190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 696190ae7a4SRichard Tran Mills 6979566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 6989566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 6999566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 7009566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 701431879ecSRichard Tran Mills csrA = a->csrA; 702431879ecSRichard Tran Mills csrB = b->csrA; 703431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 704431879ecSRichard Tran Mills 705190ae7a4SRichard Tran Mills if (csrA && csrB) { 706792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 707190ae7a4SRichard Tran Mills } else { 708f3fa974cSJacob Faibussowitsch csrC = NULL; 709190ae7a4SRichard Tran Mills } 710190ae7a4SRichard Tran Mills 7119566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 7123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 713431879ecSRichard Tran Mills } 714431879ecSRichard Tran Mills 715d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 716d71ae5a4SJacob Faibussowitsch { 7171495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 718e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 719e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 720e8be1fc7SRichard Tran Mills PetscObjectState state; 721e8be1fc7SRichard Tran Mills 722e8be1fc7SRichard Tran Mills PetscFunctionBegin; 7239566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 7249566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 7259566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 7269566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 727e8be1fc7SRichard Tran Mills csrA = a->csrA; 728e8be1fc7SRichard Tran Mills csrB = b->csrA; 729e8be1fc7SRichard Tran Mills csrC = c->csrA; 730e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 731e8be1fc7SRichard Tran Mills 732190ae7a4SRichard Tran Mills if (csrA && csrB) { 733792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 734190ae7a4SRichard Tran Mills } else { 735f3fa974cSJacob Faibussowitsch csrC = NULL; 736190ae7a4SRichard Tran Mills } 737e8be1fc7SRichard Tran Mills 738e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 7399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 7403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 741e8be1fc7SRichard Tran Mills } 742e8be1fc7SRichard Tran Mills 743d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 744d71ae5a4SJacob Faibussowitsch { 745190ae7a4SRichard Tran Mills PetscFunctionBegin; 7469566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 7473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 748190ae7a4SRichard Tran Mills } 749190ae7a4SRichard Tran Mills 750d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 751d71ae5a4SJacob Faibussowitsch { 752190ae7a4SRichard Tran Mills PetscFunctionBegin; 7539566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 7543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 755190ae7a4SRichard Tran Mills } 756190ae7a4SRichard Tran Mills 757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 758d71ae5a4SJacob Faibussowitsch { 759190ae7a4SRichard Tran Mills PetscFunctionBegin; 7609566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 7613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 762190ae7a4SRichard Tran Mills } 763190ae7a4SRichard Tran Mills 764d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 765d71ae5a4SJacob Faibussowitsch { 766190ae7a4SRichard Tran Mills PetscFunctionBegin; 7679566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 7683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 769190ae7a4SRichard Tran Mills } 770190ae7a4SRichard Tran Mills 771d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 772d71ae5a4SJacob Faibussowitsch { 773190ae7a4SRichard Tran Mills PetscFunctionBegin; 7749566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 7753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 776190ae7a4SRichard Tran Mills } 777190ae7a4SRichard Tran Mills 778d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 779d71ae5a4SJacob Faibussowitsch { 780190ae7a4SRichard Tran Mills PetscFunctionBegin; 7819566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 7823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 783190ae7a4SRichard Tran Mills } 784190ae7a4SRichard Tran Mills 785d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 786d71ae5a4SJacob Faibussowitsch { 787190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 788190ae7a4SRichard Tran Mills Mat A = product->A, B = product->B; 789190ae7a4SRichard Tran Mills 790190ae7a4SRichard Tran Mills PetscFunctionBegin; 7919566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 7923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 793190ae7a4SRichard Tran Mills } 794190ae7a4SRichard Tran Mills 795d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 796d71ae5a4SJacob Faibussowitsch { 797190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 798190ae7a4SRichard Tran Mills Mat A = product->A, B = product->B; 799190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 800190ae7a4SRichard Tran Mills 801190ae7a4SRichard Tran Mills PetscFunctionBegin; 8029566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 803190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 8043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 805190ae7a4SRichard Tran Mills } 806190ae7a4SRichard Tran Mills 807d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 808d71ae5a4SJacob Faibussowitsch { 809190ae7a4SRichard Tran Mills Mat Ct; 810190ae7a4SRichard Tran Mills Vec zeros; 8111495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 8124f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8134f53af40SRichard Tran Mills PetscBool set, flag; 814b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 8154f53af40SRichard Tran Mills PetscObjectState state; 8164f53af40SRichard Tran Mills 8174f53af40SRichard Tran Mills PetscFunctionBegin; 8189566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 819b94d7dedSBarry Smith PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 8204f53af40SRichard Tran Mills 8219566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 8229566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8239566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 8249566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 8254f53af40SRichard Tran Mills csrA = a->csrA; 8264f53af40SRichard Tran Mills csrP = p->csrA; 8274f53af40SRichard Tran Mills csrC = c->csrA; 828b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 829190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 830b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8314f53af40SRichard Tran Mills 8322fe279fdSBarry Smith /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 833792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 8344f53af40SRichard Tran Mills 835190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 836190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 837190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 8387301b172SPierre Jolivet * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 839190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 840190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 841190ae7a4SRichard Tran Mills * the full matrix. */ 8429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 8439566063dSJacob Faibussowitsch PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 8449566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(C, &zeros, NULL)); 8459566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(zeros)); 8469566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(zeros)); 8479566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 8489566063dSJacob Faibussowitsch PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 849190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 850f3fa974cSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, P, NULL, C)); 8519566063dSJacob Faibussowitsch PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 8529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 8539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&zeros)); 8549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ct)); 8553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8564f53af40SRichard Tran Mills } 857190ae7a4SRichard Tran Mills 858d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 859d71ae5a4SJacob Faibussowitsch { 860190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 861190ae7a4SRichard Tran Mills Mat A = product->A, P = product->B; 8621495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 863190ae7a4SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 864190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 865190ae7a4SRichard Tran Mills PetscObjectState state; 866190ae7a4SRichard Tran Mills 867190ae7a4SRichard Tran Mills PetscFunctionBegin; 8689566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 8699566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8709566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 8719566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 872190ae7a4SRichard Tran Mills csrA = a->csrA; 873190ae7a4SRichard Tran Mills csrP = p->csrA; 874190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 875190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 876190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 877190ae7a4SRichard Tran Mills 8782fe279fdSBarry Smith /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 879190ae7a4SRichard Tran Mills if (csrP && csrA) { 880792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 881190ae7a4SRichard Tran Mills } else { 882f3fa974cSJacob Faibussowitsch csrC = NULL; 883190ae7a4SRichard Tran Mills } 884190ae7a4SRichard Tran Mills 885190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 886190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 88749ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 88849ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 88949ba5396SRichard Tran Mills * is guaranteed. */ 8909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 891190ae7a4SRichard Tran Mills 892190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 8933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 894190ae7a4SRichard Tran Mills } 895190ae7a4SRichard Tran Mills 896d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 897d71ae5a4SJacob Faibussowitsch { 898190ae7a4SRichard Tran Mills PetscFunctionBegin; 899190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 900190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 9013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 902190ae7a4SRichard Tran Mills } 903190ae7a4SRichard Tran Mills 904d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 905d71ae5a4SJacob Faibussowitsch { 906190ae7a4SRichard Tran Mills PetscFunctionBegin; 907190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 9083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 909190ae7a4SRichard Tran Mills } 910190ae7a4SRichard Tran Mills 911d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 912d71ae5a4SJacob Faibussowitsch { 913190ae7a4SRichard Tran Mills PetscFunctionBegin; 914190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 915190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 9163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 917190ae7a4SRichard Tran Mills } 918190ae7a4SRichard Tran Mills 919d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 920d71ae5a4SJacob Faibussowitsch { 921190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 922190ae7a4SRichard Tran Mills Mat A = product->A; 923190ae7a4SRichard Tran Mills PetscBool set, flag; 924190ae7a4SRichard Tran Mills 925190ae7a4SRichard Tran Mills PetscFunctionBegin; 926a3d67537SPierre Jolivet if (PetscDefined(USE_COMPLEX)) { 9272ab6f6a8SStefano Zampini /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 9282ab6f6a8SStefano Zampini * We do this in several other locations in this file. This works for the time being, but these 929190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 9302ab6f6a8SStefano Zampini * TODO: Add proper MATSEQAIJMKL implementations */ 931190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 932a3d67537SPierre Jolivet } else { 933190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 9349566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 935a3d67537SPierre Jolivet if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 936a3d67537SPierre Jolivet else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9372fe279fdSBarry Smith /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 938190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 939a3d67537SPierre Jolivet } 9403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 941190ae7a4SRichard Tran Mills } 942190ae7a4SRichard Tran Mills 943d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 944d71ae5a4SJacob Faibussowitsch { 945190ae7a4SRichard Tran Mills PetscFunctionBegin; 9462ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 948190ae7a4SRichard Tran Mills } 949190ae7a4SRichard Tran Mills 950d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 951d71ae5a4SJacob Faibussowitsch { 952190ae7a4SRichard Tran Mills PetscFunctionBegin; 9532ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 955190ae7a4SRichard Tran Mills } 956190ae7a4SRichard Tran Mills 957d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 958d71ae5a4SJacob Faibussowitsch { 959190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 960190ae7a4SRichard Tran Mills 961190ae7a4SRichard Tran Mills PetscFunctionBegin; 962190ae7a4SRichard Tran Mills switch (product->type) { 963d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 964d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 965d71ae5a4SJacob Faibussowitsch break; 966d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AtB: 967d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 968d71ae5a4SJacob Faibussowitsch break; 969d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 970d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 971d71ae5a4SJacob Faibussowitsch break; 972d71ae5a4SJacob Faibussowitsch case MATPRODUCT_PtAP: 973d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 974d71ae5a4SJacob Faibussowitsch break; 975d71ae5a4SJacob Faibussowitsch case MATPRODUCT_RARt: 976d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 977d71ae5a4SJacob Faibussowitsch break; 978d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABC: 979d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 980d71ae5a4SJacob Faibussowitsch break; 981d71ae5a4SJacob Faibussowitsch default: 982d71ae5a4SJacob Faibussowitsch break; 983190ae7a4SRichard Tran Mills } 9843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 985190ae7a4SRichard Tran Mills } 986431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 9874f53af40SRichard Tran Mills 9884a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 989510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 9904a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 9914a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 992d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 993d71ae5a4SJacob Faibussowitsch { 9944a2a386eSRichard Tran Mills Mat B = *newmat; 9954a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 996c9d46305SRichard Tran Mills PetscBool set; 997e9c94282SRichard Tran Mills PetscBool sametype; 9984a2a386eSRichard Tran Mills 9994a2a386eSRichard Tran Mills PetscFunctionBegin; 10009566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 10014a2a386eSRichard Tran Mills 10029566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 10033ba16761SJacob Faibussowitsch if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 1004e9c94282SRichard Tran Mills 10054dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&aijmkl)); 10064a2a386eSRichard Tran Mills B->spptr = (void *)aijmkl; 10074a2a386eSRichard Tran Mills 1008df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 1009969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 10104a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 10114a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 10124a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 1013c9d46305SRichard Tran Mills 10144abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1015ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1016d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1017a8327b06SKarl Rupp #else 1018d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1019d995685eSRichard Tran Mills #endif 10205b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 10214abfa3b3SRichard Tran Mills 10224abfa3b3SRichard Tran Mills /* Parse command line options. */ 1023d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 10249566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 10259566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1026d0609cedSBarry Smith PetscOptionsEnd(); 1027ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1028d995685eSRichard Tran Mills if (!aijmkl->no_SpMV2) { 10299566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1030d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1031d995685eSRichard Tran Mills } 1032d995685eSRichard Tran Mills #endif 1033c9d46305SRichard Tran Mills 1034ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1035df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1036969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1037df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1038969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 10398a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1040190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1041190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1042190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1043190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1044190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1045ffcab697SRichard Tran Mills #if !defined(PETSC_USE_COMPLEX) 104649ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1047190ae7a4SRichard Tran Mills #else 1048190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 10494f53af40SRichard Tran Mills #endif 1050e8be1fc7SRichard Tran Mills #endif 10511950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 10521950a7e7SRichard Tran Mills 1053213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1054213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1055213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1056213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1057213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 10581950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 10594a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1060969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 10614a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1062969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1063c9d46305SRichard Tran Mills } 10641950a7e7SRichard Tran Mills #endif 10654a2a386eSRichard Tran Mills 10669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 10674a2a386eSRichard Tran Mills 10689566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 10694a2a386eSRichard Tran Mills *newmat = B; 10703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10714a2a386eSRichard Tran Mills } 10724a2a386eSRichard Tran Mills 10734a2a386eSRichard Tran Mills /*@C 107411a5261eSBarry Smith MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 107590147e49SRichard Tran Mills 1076d083f849SBarry Smith Collective 10774a2a386eSRichard Tran Mills 10784a2a386eSRichard Tran Mills Input Parameters: 107911a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 10804a2a386eSRichard Tran Mills . m - number of rows 10814a2a386eSRichard Tran Mills . n - number of columns 10824a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 10834a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 10842ef1f0ffSBarry Smith (possibly different for each row) or `NULL` 10854a2a386eSRichard Tran Mills 10864a2a386eSRichard Tran Mills Output Parameter: 10874a2a386eSRichard Tran Mills . A - the matrix 10884a2a386eSRichard Tran Mills 108990147e49SRichard Tran Mills Options Database Keys: 109066b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 10912ef1f0ffSBarry Smith - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, 10922ef1f0ffSBarry Smith performing this step the first time the matrix is applied 10934a2a386eSRichard Tran Mills 10944a2a386eSRichard Tran Mills Level: intermediate 10954a2a386eSRichard Tran Mills 10962fe279fdSBarry Smith Notes: 10972ef1f0ffSBarry Smith If `nnz` is given then `nz` is ignored 10982ef1f0ffSBarry Smith 10992fe279fdSBarry Smith This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 11002fe279fdSBarry Smith routines from Intel MKL whenever possible. 11012fe279fdSBarry Smith 11022fe279fdSBarry Smith If the installed version of MKL supports the "SpMV2" sparse 11032fe279fdSBarry Smith inspector-executor routines, then those are used by default. 11042fe279fdSBarry Smith 11052fe279fdSBarry Smith `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 11062fe279fdSBarry Smith (for symmetric A) operations are currently supported. 11072fe279fdSBarry Smith MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 11082fe279fdSBarry Smith 11091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 11104a2a386eSRichard Tran Mills @*/ 1111d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1112d71ae5a4SJacob Faibussowitsch { 11134a2a386eSRichard Tran Mills PetscFunctionBegin; 11149566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 11159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 11169566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJMKL)); 11179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 11183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11194a2a386eSRichard Tran Mills } 11204a2a386eSRichard Tran Mills 1121d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1122d71ae5a4SJacob Faibussowitsch { 11234a2a386eSRichard Tran Mills PetscFunctionBegin; 11249566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 11259566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 11263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11274a2a386eSRichard Tran Mills } 1128