1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 264a2a386eSRichard Tran Mills 27*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 28*d71ae5a4SJacob Faibussowitsch { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills Mat B = *newmat; 32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 334a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 34c1d5218aSRichard Tran Mills #endif 354a2a386eSRichard Tran Mills 364a2a386eSRichard Tran Mills PetscFunctionBegin; 379566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 384a2a386eSRichard Tran Mills 394a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4054871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 414a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 424a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4354871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 44ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4554871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 46ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 524f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 534a2a386eSRichard Tran Mills 549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 554222ddf1SHong Zhang 56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 574abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 59e9c94282SRichard Tran Mills * the spptr pointer. */ 60a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 61a8327b06SKarl Rupp 62792fecdfSBarry Smith if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 649566063dSJacob Faibussowitsch PetscCall(PetscFree(B->spptr)); 654a2a386eSRichard Tran Mills 664a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 684a2a386eSRichard Tran Mills 694a2a386eSRichard Tran Mills *newmat = B; 704a2a386eSRichard Tran Mills PetscFunctionReturn(0); 714a2a386eSRichard Tran Mills } 724a2a386eSRichard Tran Mills 73*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74*d71ae5a4SJacob Faibussowitsch { 754a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 764a2a386eSRichard Tran Mills 774a2a386eSRichard Tran Mills PetscFunctionBegin; 78e9c94282SRichard Tran Mills 79edc89de7SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80e9c94282SRichard Tran Mills if (aijmkl) { 814a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 83792fecdfSBarry Smith if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 844abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 859566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 86e9c94282SRichard Tran Mills } 874a2a386eSRichard Tran Mills 884a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 894a2a386eSRichard Tran Mills * to destroy everything that remains. */ 909566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 914a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 924a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 934a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 942e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 959566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 964a2a386eSRichard Tran Mills PetscFunctionReturn(0); 974a2a386eSRichard Tran Mills } 984a2a386eSRichard Tran Mills 99190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1005b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1015b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1025b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1035b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1045b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1055b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 106*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 107*d71ae5a4SJacob Faibussowitsch { 108ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1096e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1106e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1116e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 11245fbe478SRichard Tran Mills PetscFunctionBegin; 1136e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1146e369cd5SRichard Tran Mills #else 115a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 116a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 117a8327b06SKarl Rupp PetscInt m, n; 118a8327b06SKarl Rupp MatScalar *aa; 119a8327b06SKarl Rupp PetscInt *aj, *ai; 1204a2a386eSRichard Tran Mills 121a8327b06SKarl Rupp PetscFunctionBegin; 122e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 123e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 124e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 125e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 126e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1276e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1284d51fa23SRichard Tran Mills #endif 1296e369cd5SRichard Tran Mills 1300632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1310632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1320632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 133792fecdfSBarry Smith PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 1340632b357SRichard Tran Mills } 1358d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1366e369cd5SRichard Tran Mills 137c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 138df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 139df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 140df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 14158678438SRichard Tran Mills m = A->rmap->n; 14258678438SRichard Tran Mills n = A->cmap->n; 143df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 144df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 145df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1461495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1478d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1488d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 149792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa); 150792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 151792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 15248a46eb9SPierre Jolivet if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 1534abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 1549566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 155190ae7a4SRichard Tran Mills } else { 156190ae7a4SRichard Tran Mills aijmkl->csrA = PETSC_NULL; 157c9d46305SRichard Tran Mills } 1586e369cd5SRichard Tran Mills 1596e369cd5SRichard Tran Mills PetscFunctionReturn(0); 160d995685eSRichard Tran Mills #endif 1616e369cd5SRichard Tran Mills } 1626e369cd5SRichard Tran Mills 163b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 164190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 165*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 166*d71ae5a4SJacob Faibussowitsch { 16719afcda9SRichard Tran Mills sparse_index_base_t indexing; 168190ae7a4SRichard Tran Mills PetscInt m, n; 16945fbe478SRichard Tran Mills PetscInt *aj, *ai, *dummy; 17019afcda9SRichard Tran Mills MatScalar *aa; 17119afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 17219afcda9SRichard Tran Mills 173362febeeSStefano Zampini PetscFunctionBegin; 174190ae7a4SRichard Tran Mills if (csrA) { 17545fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 176792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa); 1775f80ce2aSJacob Faibussowitsch PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 178190ae7a4SRichard Tran Mills } else { 179190ae7a4SRichard Tran Mills aj = ai = PETSC_NULL; 180190ae7a4SRichard Tran Mills aa = PETSC_NULL; 181aab60f1bSRichard Tran Mills } 182190ae7a4SRichard Tran Mills 1839566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 1849566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 185aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 186aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 187aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 188aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 189190ae7a4SRichard Tran Mills if (csrA) { 1909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 191190ae7a4SRichard Tran Mills } else { 192190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 1939566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 1949566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1959566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 196190ae7a4SRichard Tran Mills } 19719afcda9SRichard Tran Mills 19819afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 19919afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 2009566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 2016c87cf42SRichard Tran Mills 20219afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL *)A->spptr; 20319afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2046c87cf42SRichard Tran Mills 20519afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 20619afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 20719afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 208f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 209f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 210f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 211190ae7a4SRichard Tran Mills if (csrA) { 212792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 213792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 2141950a7e7SRichard Tran Mills } 2159566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 21619afcda9SRichard Tran Mills PetscFunctionReturn(0); 21719afcda9SRichard Tran Mills } 218b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 219190ae7a4SRichard Tran Mills 220e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 221e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 222e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 223b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 224*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 225*d71ae5a4SJacob Faibussowitsch { 226e8be1fc7SRichard Tran Mills PetscInt i; 227e8be1fc7SRichard Tran Mills PetscInt nrows, ncols; 228e8be1fc7SRichard Tran Mills PetscInt nz; 229e8be1fc7SRichard Tran Mills PetscInt *ai, *aj, *dummy; 230e8be1fc7SRichard Tran Mills PetscScalar *aa; 2311495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 232e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 233e8be1fc7SRichard Tran Mills 234362febeeSStefano Zampini PetscFunctionBegin; 235190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 236190ae7a4SRichard Tran Mills if (!aijmkl->csrA) PetscFunctionReturn(0); 237190ae7a4SRichard Tran Mills 238e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 239792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 240e8be1fc7SRichard Tran Mills 241e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 242e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 243e8be1fc7SRichard Tran Mills for (i = 0; i < nrows; i++) { 244e8be1fc7SRichard Tran Mills nz = ai[i + 1] - ai[i]; 2459566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 246e8be1fc7SRichard Tran Mills } 247e8be1fc7SRichard Tran Mills 2489566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2499566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 250e8be1fc7SRichard Tran Mills 2519566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 252a7180b50SRichard Tran Mills /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 253a7180b50SRichard Tran Mills * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 254a7180b50SRichard Tran Mills * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 255a7180b50SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 256e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 257e8be1fc7SRichard Tran Mills } 258b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 259e8be1fc7SRichard Tran Mills 2603849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 261*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 262*d71ae5a4SJacob Faibussowitsch { 2633849ddb2SRichard Tran Mills PetscInt i, j, k; 2643849ddb2SRichard Tran Mills PetscInt nrows, ncols; 2653849ddb2SRichard Tran Mills PetscInt nz; 2663849ddb2SRichard Tran Mills PetscInt *ai, *aj, *dummy; 2673849ddb2SRichard Tran Mills PetscScalar *aa; 2681495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 2693849ddb2SRichard Tran Mills sparse_index_base_t indexing; 2703849ddb2SRichard Tran Mills 271362febeeSStefano Zampini PetscFunctionBegin; 2729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 2733849ddb2SRichard Tran Mills 2743849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 2753849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 2769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 2773849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2783849ddb2SRichard Tran Mills } 2793849ddb2SRichard Tran Mills 2803849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 281792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 2823849ddb2SRichard Tran Mills 2833849ddb2SRichard Tran Mills k = 0; 2843849ddb2SRichard Tran Mills for (i = 0; i < nrows; i++) { 2859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 2863849ddb2SRichard Tran Mills nz = ai[i + 1] - ai[i]; 2873849ddb2SRichard Tran Mills for (j = 0; j < nz; j++) { 2883849ddb2SRichard Tran Mills if (aa) { 2899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 2903849ddb2SRichard Tran Mills } else { 2919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 2923849ddb2SRichard Tran Mills } 2933849ddb2SRichard Tran Mills k++; 2943849ddb2SRichard Tran Mills } 2959566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 2963849ddb2SRichard Tran Mills } 2973849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2983849ddb2SRichard Tran Mills } 2993849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 3003849ddb2SRichard Tran Mills 301*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 302*d71ae5a4SJacob Faibussowitsch { 3031495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 3046e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3056e369cd5SRichard Tran Mills 3066e369cd5SRichard Tran Mills PetscFunctionBegin; 3079566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 3086e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 3099566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 3106e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3111baa6e33SBarry Smith if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3126e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3136e369cd5SRichard Tran Mills } 3146e369cd5SRichard Tran Mills 315*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 316*d71ae5a4SJacob Faibussowitsch { 3176e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3185b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3196e369cd5SRichard Tran Mills 3206e369cd5SRichard Tran Mills PetscFunctionBegin; 3216e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3226e369cd5SRichard Tran Mills 3236e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3246e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3256e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3266e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 327d96e85feSRichard Tran Mills * a lot of code duplication. */ 3286e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3299566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 3306e369cd5SRichard Tran Mills 3315b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3325b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3335b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL *)A->spptr; 3341baa6e33SBarry Smith if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 335df555b71SRichard Tran Mills 3364a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3374a2a386eSRichard Tran Mills } 3384a2a386eSRichard Tran Mills 339e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 340*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 341*d71ae5a4SJacob Faibussowitsch { 3424a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3434a2a386eSRichard Tran Mills const PetscScalar *x; 3444a2a386eSRichard Tran Mills PetscScalar *y; 3454a2a386eSRichard Tran Mills const MatScalar *aa; 3464a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 347db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 348db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 349db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3504a2a386eSRichard Tran Mills const PetscInt *aj, *ai; 351db63039fSRichard Tran Mills char matdescra[6]; 352db63039fSRichard Tran Mills 3534a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 354ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 355ff03dc53SRichard Tran Mills 356ff03dc53SRichard Tran Mills PetscFunctionBegin; 357db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 358db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3599566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3609566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 361ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 362ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 363ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 364ff03dc53SRichard Tran Mills 365ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 366db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 367ff03dc53SRichard Tran Mills 3689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 3699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 371ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 372ff03dc53SRichard Tran Mills } 3731950a7e7SRichard Tran Mills #endif 374ff03dc53SRichard Tran Mills 375ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 376*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 377*d71ae5a4SJacob Faibussowitsch { 378df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 379df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 380df555b71SRichard Tran Mills const PetscScalar *x; 381df555b71SRichard Tran Mills PetscScalar *y; 382551aa5c8SRichard Tran Mills PetscObjectState state; 383df555b71SRichard Tran Mills 384df555b71SRichard Tran Mills PetscFunctionBegin; 385df555b71SRichard Tran Mills 38638987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 38738987b35SRichard Tran Mills if (!a->nz) { 3889566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3899566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y, A->rmap->n)); 3909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 39138987b35SRichard Tran Mills PetscFunctionReturn(0); 39238987b35SRichard Tran Mills } 393f36dfe3fSRichard Tran Mills 3949566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3959566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 396df555b71SRichard Tran Mills 3973fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3983fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3993fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4009566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 4019566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4023fa15762SRichard Tran Mills 403df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 404792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 405df555b71SRichard Tran Mills 4069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 409df555b71SRichard Tran Mills PetscFunctionReturn(0); 410df555b71SRichard Tran Mills } 411d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 412df555b71SRichard Tran Mills 413e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 414*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 415*d71ae5a4SJacob Faibussowitsch { 416ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 417ff03dc53SRichard Tran Mills const PetscScalar *x; 418ff03dc53SRichard Tran Mills PetscScalar *y; 419ff03dc53SRichard Tran Mills const MatScalar *aa; 420ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 421db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 422db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 423db63039fSRichard Tran Mills PetscScalar beta = 0.0; 424ff03dc53SRichard Tran Mills const PetscInt *aj, *ai; 425db63039fSRichard Tran Mills char matdescra[6]; 426ff03dc53SRichard Tran Mills 427ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 428ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4294a2a386eSRichard Tran Mills 4304a2a386eSRichard Tran Mills PetscFunctionBegin; 431969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 432969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4349566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4354a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4364a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4374a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4384a2a386eSRichard Tran Mills 4394a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 440db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 4414a2a386eSRichard Tran Mills 4429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4439566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4449566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4454a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4464a2a386eSRichard Tran Mills } 4471950a7e7SRichard Tran Mills #endif 4484a2a386eSRichard Tran Mills 449ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 450*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 451*d71ae5a4SJacob Faibussowitsch { 452df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 453df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 454df555b71SRichard Tran Mills const PetscScalar *x; 455df555b71SRichard Tran Mills PetscScalar *y; 456551aa5c8SRichard Tran Mills PetscObjectState state; 457df555b71SRichard Tran Mills 458df555b71SRichard Tran Mills PetscFunctionBegin; 459df555b71SRichard Tran Mills 46038987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 46138987b35SRichard Tran Mills if (!a->nz) { 4629566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4639566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y, A->cmap->n)); 4649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 46538987b35SRichard Tran Mills PetscFunctionReturn(0); 46638987b35SRichard Tran Mills } 467f36dfe3fSRichard Tran Mills 4689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4699566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 470df555b71SRichard Tran Mills 4713fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4723fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4733fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4749566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 4759566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4763fa15762SRichard Tran Mills 477df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 478792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 479df555b71SRichard Tran Mills 4809566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 483df555b71SRichard Tran Mills PetscFunctionReturn(0); 484df555b71SRichard Tran Mills } 485d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 486df555b71SRichard Tran Mills 487e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 488*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 489*d71ae5a4SJacob Faibussowitsch { 4904a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 4914a2a386eSRichard Tran Mills const PetscScalar *x; 4924a2a386eSRichard Tran Mills PetscScalar *y, *z; 4934a2a386eSRichard Tran Mills const MatScalar *aa; 4944a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 495db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 4964a2a386eSRichard Tran Mills const PetscInt *aj, *ai; 4974a2a386eSRichard Tran Mills PetscInt i; 4984a2a386eSRichard Tran Mills 499ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 500ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 501a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 502db63039fSRichard Tran Mills PetscScalar beta; 503a84739b8SRichard Tran Mills char matdescra[6]; 504ff03dc53SRichard Tran Mills 505ff03dc53SRichard Tran Mills PetscFunctionBegin; 506a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 507a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 508a84739b8SRichard Tran Mills 5099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5109566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 511ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 512ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 513ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 514ff03dc53SRichard Tran Mills 515ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 516a84739b8SRichard Tran Mills if (zz == yy) { 517a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 518db63039fSRichard Tran Mills beta = 1.0; 519db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 520a84739b8SRichard Tran Mills } else { 521db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 522db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 523db63039fSRichard Tran Mills beta = 0.0; 524db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 525ad540459SPierre Jolivet for (i = 0; i < m; i++) z[i] += y[i]; 526a84739b8SRichard Tran Mills } 527ff03dc53SRichard Tran Mills 5289566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 5299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5309566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 531ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 532ff03dc53SRichard Tran Mills } 5331950a7e7SRichard Tran Mills #endif 534ff03dc53SRichard Tran Mills 535ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 536*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 537*d71ae5a4SJacob Faibussowitsch { 538df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 539df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 540df555b71SRichard Tran Mills const PetscScalar *x; 541df555b71SRichard Tran Mills PetscScalar *y, *z; 542df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 543df555b71SRichard Tran Mills PetscInt i; 544df555b71SRichard Tran Mills 545df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 546551aa5c8SRichard Tran Mills PetscObjectState state; 547df555b71SRichard Tran Mills 548df555b71SRichard Tran Mills PetscFunctionBegin; 549df555b71SRichard Tran Mills 55038987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 55138987b35SRichard Tran Mills if (!a->nz) { 5529566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 5539566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, y, m)); 5549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 55538987b35SRichard Tran Mills PetscFunctionReturn(0); 55638987b35SRichard Tran Mills } 557df555b71SRichard Tran Mills 5589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5599566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 560df555b71SRichard Tran Mills 5613fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5623fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5633fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5649566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 5659566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 5663fa15762SRichard Tran Mills 567df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 568df555b71SRichard Tran Mills if (zz == yy) { 569df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 570df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 571792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 572df555b71SRichard Tran Mills } else { 573df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 574df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 575792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 5765f80ce2aSJacob Faibussowitsch for (i = 0; i < m; i++) z[i] += y[i]; 577df555b71SRichard Tran Mills } 578df555b71SRichard Tran Mills 5799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 5809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 582df555b71SRichard Tran Mills PetscFunctionReturn(0); 583df555b71SRichard Tran Mills } 584d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 585df555b71SRichard Tran Mills 586e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 587*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 588*d71ae5a4SJacob Faibussowitsch { 589ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 590ff03dc53SRichard Tran Mills const PetscScalar *x; 591ff03dc53SRichard Tran Mills PetscScalar *y, *z; 592ff03dc53SRichard Tran Mills const MatScalar *aa; 593ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 594db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 595ff03dc53SRichard Tran Mills const PetscInt *aj, *ai; 596ff03dc53SRichard Tran Mills PetscInt i; 597ff03dc53SRichard Tran Mills 598ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 599ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 600a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 601db63039fSRichard Tran Mills PetscScalar beta; 602a84739b8SRichard Tran Mills char matdescra[6]; 6034a2a386eSRichard Tran Mills 6044a2a386eSRichard Tran Mills PetscFunctionBegin; 605a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 606a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 607a84739b8SRichard Tran Mills 6089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6099566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 6104a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6114a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6124a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6134a2a386eSRichard Tran Mills 6144a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 615a84739b8SRichard Tran Mills if (zz == yy) { 616a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 617db63039fSRichard Tran Mills beta = 1.0; 618969800c5SRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 619a84739b8SRichard Tran Mills } else { 620db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 621db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 622db63039fSRichard Tran Mills beta = 0.0; 623db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 624ad540459SPierre Jolivet for (i = 0; i < n; i++) z[i] += y[i]; 625a84739b8SRichard Tran Mills } 6264a2a386eSRichard Tran Mills 6279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 6289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 6304a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6314a2a386eSRichard Tran Mills } 6321950a7e7SRichard Tran Mills #endif 6334a2a386eSRichard Tran Mills 634ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 635*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 636*d71ae5a4SJacob Faibussowitsch { 637df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 638df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 639df555b71SRichard Tran Mills const PetscScalar *x; 640df555b71SRichard Tran Mills PetscScalar *y, *z; 641969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 642df555b71SRichard Tran Mills PetscInt i; 643551aa5c8SRichard Tran Mills PetscObjectState state; 644df555b71SRichard Tran Mills 645df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 646df555b71SRichard Tran Mills 647df555b71SRichard Tran Mills PetscFunctionBegin; 648df555b71SRichard Tran Mills 64938987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 65038987b35SRichard Tran Mills if (!a->nz) { 6519566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 6529566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, y, n)); 6539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 65438987b35SRichard Tran Mills PetscFunctionReturn(0); 65538987b35SRichard Tran Mills } 656f36dfe3fSRichard Tran Mills 6579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6589566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 659df555b71SRichard Tran Mills 6603fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6613fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6623fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 6639566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 6645f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 6653fa15762SRichard Tran Mills 666df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 667df555b71SRichard Tran Mills if (zz == yy) { 668df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 669df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 670792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 671df555b71SRichard Tran Mills } else { 672df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 673df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 674792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 6755f80ce2aSJacob Faibussowitsch for (i = 0; i < n; i++) z[i] += y[i]; 676df555b71SRichard Tran Mills } 677df555b71SRichard Tran Mills 6789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 6799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 681df555b71SRichard Tran Mills PetscFunctionReturn(0); 682df555b71SRichard Tran Mills } 683d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 684df555b71SRichard Tran Mills 685190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */ 6868a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 687*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 688*d71ae5a4SJacob Faibussowitsch { 6891495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 690431879ecSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 691190ae7a4SRichard Tran Mills PetscInt nrows, ncols; 692431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 693431879ecSRichard Tran Mills PetscObjectState state; 694431879ecSRichard Tran Mills 695431879ecSRichard Tran Mills PetscFunctionBegin; 696190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 697190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 698190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 699190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 700190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 701190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 702190ae7a4SRichard Tran Mills 7039566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 7049566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 7059566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 7069566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 707431879ecSRichard Tran Mills csrA = a->csrA; 708431879ecSRichard Tran Mills csrB = b->csrA; 709431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 710431879ecSRichard Tran Mills 711190ae7a4SRichard Tran Mills if (csrA && csrB) { 712792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 713190ae7a4SRichard Tran Mills } else { 714190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 715190ae7a4SRichard Tran Mills } 716190ae7a4SRichard Tran Mills 7179566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 718431879ecSRichard Tran Mills 719431879ecSRichard Tran Mills PetscFunctionReturn(0); 720431879ecSRichard Tran Mills } 721431879ecSRichard Tran Mills 722*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 723*d71ae5a4SJacob Faibussowitsch { 7241495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 725e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 726e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 727e8be1fc7SRichard Tran Mills PetscObjectState state; 728e8be1fc7SRichard Tran Mills 729e8be1fc7SRichard Tran Mills PetscFunctionBegin; 7309566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 7319566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 7329566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 7339566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 734e8be1fc7SRichard Tran Mills csrA = a->csrA; 735e8be1fc7SRichard Tran Mills csrB = b->csrA; 736e8be1fc7SRichard Tran Mills csrC = c->csrA; 737e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 738e8be1fc7SRichard Tran Mills 739190ae7a4SRichard Tran Mills if (csrA && csrB) { 740792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 741190ae7a4SRichard Tran Mills } else { 742190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 743190ae7a4SRichard Tran Mills } 744e8be1fc7SRichard Tran Mills 745e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 7469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 747e8be1fc7SRichard Tran Mills 748e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 749e8be1fc7SRichard Tran Mills } 750e8be1fc7SRichard Tran Mills 751*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 752*d71ae5a4SJacob Faibussowitsch { 753190ae7a4SRichard Tran Mills PetscFunctionBegin; 7549566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 755190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 756190ae7a4SRichard Tran Mills } 757190ae7a4SRichard Tran Mills 758*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 759*d71ae5a4SJacob Faibussowitsch { 760190ae7a4SRichard Tran Mills PetscFunctionBegin; 7619566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 762190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 763190ae7a4SRichard Tran Mills } 764190ae7a4SRichard Tran Mills 765*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 766*d71ae5a4SJacob Faibussowitsch { 767190ae7a4SRichard Tran Mills PetscFunctionBegin; 7689566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 769190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 770190ae7a4SRichard Tran Mills } 771190ae7a4SRichard Tran Mills 772*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 773*d71ae5a4SJacob Faibussowitsch { 774190ae7a4SRichard Tran Mills PetscFunctionBegin; 7759566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 776190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 777190ae7a4SRichard Tran Mills } 778190ae7a4SRichard Tran Mills 779*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 780*d71ae5a4SJacob Faibussowitsch { 781190ae7a4SRichard Tran Mills PetscFunctionBegin; 7829566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 783190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 784190ae7a4SRichard Tran Mills } 785190ae7a4SRichard Tran Mills 786*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 787*d71ae5a4SJacob Faibussowitsch { 788190ae7a4SRichard Tran Mills PetscFunctionBegin; 7899566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 790190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 791190ae7a4SRichard Tran Mills } 792190ae7a4SRichard Tran Mills 793*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 794*d71ae5a4SJacob Faibussowitsch { 795190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 796190ae7a4SRichard Tran Mills Mat A = product->A, B = product->B; 797190ae7a4SRichard Tran Mills 798190ae7a4SRichard Tran Mills PetscFunctionBegin; 7999566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 800190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 801190ae7a4SRichard Tran Mills } 802190ae7a4SRichard Tran Mills 803*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 804*d71ae5a4SJacob Faibussowitsch { 805190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 806190ae7a4SRichard Tran Mills Mat A = product->A, B = product->B; 807190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 808190ae7a4SRichard Tran Mills 809190ae7a4SRichard Tran Mills PetscFunctionBegin; 8109566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 811190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 812190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 813190ae7a4SRichard Tran Mills } 814190ae7a4SRichard Tran Mills 815*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 816*d71ae5a4SJacob Faibussowitsch { 817190ae7a4SRichard Tran Mills Mat Ct; 818190ae7a4SRichard Tran Mills Vec zeros; 8191495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 8204f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8214f53af40SRichard Tran Mills PetscBool set, flag; 822b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 8234f53af40SRichard Tran Mills PetscObjectState state; 8244f53af40SRichard Tran Mills 8254f53af40SRichard Tran Mills PetscFunctionBegin; 8269566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 827b94d7dedSBarry Smith PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 8284f53af40SRichard Tran Mills 8299566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 8309566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8319566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 8329566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 8334f53af40SRichard Tran Mills csrA = a->csrA; 8344f53af40SRichard Tran Mills csrP = p->csrA; 8354f53af40SRichard Tran Mills csrC = c->csrA; 836b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 837190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 838b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8394f53af40SRichard Tran Mills 840f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 841792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 8424f53af40SRichard Tran Mills 843190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 844190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 845190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 8467301b172SPierre Jolivet * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 847190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 848190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 849190ae7a4SRichard Tran Mills * the full matrix. */ 8509566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 8519566063dSJacob Faibussowitsch PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 8529566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(C, &zeros, NULL)); 8539566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(zeros)); 8549566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(zeros)); 8559566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 8569566063dSJacob Faibussowitsch PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 857190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 8589566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C)); 8599566063dSJacob Faibussowitsch PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 8609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 8619566063dSJacob Faibussowitsch PetscCall(VecDestroy(&zeros)); 8629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ct)); 8634f53af40SRichard Tran Mills PetscFunctionReturn(0); 8644f53af40SRichard Tran Mills } 865190ae7a4SRichard Tran Mills 866*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 867*d71ae5a4SJacob Faibussowitsch { 868190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 869190ae7a4SRichard Tran Mills Mat A = product->A, P = product->B; 8701495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 871190ae7a4SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 872190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 873190ae7a4SRichard Tran Mills PetscObjectState state; 874190ae7a4SRichard Tran Mills 875190ae7a4SRichard Tran Mills PetscFunctionBegin; 8769566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 8779566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8789566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 8799566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 880190ae7a4SRichard Tran Mills csrA = a->csrA; 881190ae7a4SRichard Tran Mills csrP = p->csrA; 882190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 883190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 884190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 885190ae7a4SRichard Tran Mills 886190ae7a4SRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 887190ae7a4SRichard Tran Mills if (csrP && csrA) { 888792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 889190ae7a4SRichard Tran Mills } else { 890190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 891190ae7a4SRichard Tran Mills } 892190ae7a4SRichard Tran Mills 893190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 894190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 89549ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 89649ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 89749ba5396SRichard Tran Mills * is guaranteed. */ 8989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 899190ae7a4SRichard Tran Mills 900190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 901190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 902190ae7a4SRichard Tran Mills } 903190ae7a4SRichard Tran Mills 904*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 905*d71ae5a4SJacob Faibussowitsch { 906190ae7a4SRichard Tran Mills PetscFunctionBegin; 907190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 908190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 909190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 910190ae7a4SRichard Tran Mills } 911190ae7a4SRichard Tran Mills 912*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 913*d71ae5a4SJacob Faibussowitsch { 914190ae7a4SRichard Tran Mills PetscFunctionBegin; 915190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 916190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 917190ae7a4SRichard Tran Mills } 918190ae7a4SRichard Tran Mills 919*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 920*d71ae5a4SJacob Faibussowitsch { 921190ae7a4SRichard Tran Mills PetscFunctionBegin; 922190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 923190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 924190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 925190ae7a4SRichard Tran Mills } 926190ae7a4SRichard Tran Mills 927*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 928*d71ae5a4SJacob Faibussowitsch { 929190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 930190ae7a4SRichard Tran Mills Mat A = product->A; 931190ae7a4SRichard Tran Mills PetscBool set, flag; 932190ae7a4SRichard Tran Mills 933190ae7a4SRichard Tran Mills PetscFunctionBegin; 934a3d67537SPierre Jolivet if (PetscDefined(USE_COMPLEX)) { 9352ab6f6a8SStefano Zampini /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 9362ab6f6a8SStefano Zampini * We do this in several other locations in this file. This works for the time being, but these 937190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 9382ab6f6a8SStefano Zampini * TODO: Add proper MATSEQAIJMKL implementations */ 939190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 940a3d67537SPierre Jolivet } else { 941190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 9429566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 943a3d67537SPierre Jolivet if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 944a3d67537SPierre Jolivet else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9451495fedeSRichard Tran Mills /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 946190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 947a3d67537SPierre Jolivet } 948190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 949190ae7a4SRichard Tran Mills } 950190ae7a4SRichard Tran Mills 951*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 952*d71ae5a4SJacob Faibussowitsch { 953190ae7a4SRichard Tran Mills PetscFunctionBegin; 9542ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 955190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 956190ae7a4SRichard Tran Mills } 957190ae7a4SRichard Tran Mills 958*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 959*d71ae5a4SJacob Faibussowitsch { 960190ae7a4SRichard Tran Mills PetscFunctionBegin; 9612ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 962190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 963190ae7a4SRichard Tran Mills } 964190ae7a4SRichard Tran Mills 965*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 966*d71ae5a4SJacob Faibussowitsch { 967190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 968190ae7a4SRichard Tran Mills 969190ae7a4SRichard Tran Mills PetscFunctionBegin; 970190ae7a4SRichard Tran Mills switch (product->type) { 971*d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 972*d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 973*d71ae5a4SJacob Faibussowitsch break; 974*d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AtB: 975*d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 976*d71ae5a4SJacob Faibussowitsch break; 977*d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 978*d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 979*d71ae5a4SJacob Faibussowitsch break; 980*d71ae5a4SJacob Faibussowitsch case MATPRODUCT_PtAP: 981*d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 982*d71ae5a4SJacob Faibussowitsch break; 983*d71ae5a4SJacob Faibussowitsch case MATPRODUCT_RARt: 984*d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 985*d71ae5a4SJacob Faibussowitsch break; 986*d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABC: 987*d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 988*d71ae5a4SJacob Faibussowitsch break; 989*d71ae5a4SJacob Faibussowitsch default: 990*d71ae5a4SJacob Faibussowitsch break; 991190ae7a4SRichard Tran Mills } 992190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 993190ae7a4SRichard Tran Mills } 994431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 995190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */ 9964f53af40SRichard Tran Mills 9974a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 998510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 9994a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 10004a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 1001*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 1002*d71ae5a4SJacob Faibussowitsch { 10034a2a386eSRichard Tran Mills Mat B = *newmat; 10044a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1005c9d46305SRichard Tran Mills PetscBool set; 1006e9c94282SRichard Tran Mills PetscBool sametype; 10074a2a386eSRichard Tran Mills 10084a2a386eSRichard Tran Mills PetscFunctionBegin; 10099566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 10104a2a386eSRichard Tran Mills 10119566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 1012e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 1013e9c94282SRichard Tran Mills 10144dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&aijmkl)); 10154a2a386eSRichard Tran Mills B->spptr = (void *)aijmkl; 10164a2a386eSRichard Tran Mills 1017df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 1018969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 10194a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 10204a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 10214a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 1022c9d46305SRichard Tran Mills 10234abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1024ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1025d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1026a8327b06SKarl Rupp #else 1027d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1028d995685eSRichard Tran Mills #endif 10295b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 10304abfa3b3SRichard Tran Mills 10314abfa3b3SRichard Tran Mills /* Parse command line options. */ 1032d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 10339566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 10349566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1035d0609cedSBarry Smith PetscOptionsEnd(); 1036ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1037d995685eSRichard Tran Mills if (!aijmkl->no_SpMV2) { 10389566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1039d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1040d995685eSRichard Tran Mills } 1041d995685eSRichard Tran Mills #endif 1042c9d46305SRichard Tran Mills 1043ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1044df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1045969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1046df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1047969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 10488a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1049190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1050190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1051190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1052190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1053190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1054ffcab697SRichard Tran Mills #if !defined(PETSC_USE_COMPLEX) 105549ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1056190ae7a4SRichard Tran Mills #else 1057190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 10584f53af40SRichard Tran Mills #endif 1059e8be1fc7SRichard Tran Mills #endif 10601950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 10611950a7e7SRichard Tran Mills 1062213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1063213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1064213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1065213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1066213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 10671950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 10684a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1069969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 10704a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1071969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1072c9d46305SRichard Tran Mills } 10731950a7e7SRichard Tran Mills #endif 10744a2a386eSRichard Tran Mills 10759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 10764a2a386eSRichard Tran Mills 10779566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 10784a2a386eSRichard Tran Mills *newmat = B; 10794a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10804a2a386eSRichard Tran Mills } 10814a2a386eSRichard Tran Mills 10824a2a386eSRichard Tran Mills /*@C 108311a5261eSBarry Smith MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 108411a5261eSBarry Smith This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 10854a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 108690147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 108790147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 108811a5261eSBarry Smith `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 108911a5261eSBarry Smith (for symmetric A) operations are currently supported. 109011a5261eSBarry Smith Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 109190147e49SRichard Tran Mills 1092d083f849SBarry Smith Collective 10934a2a386eSRichard Tran Mills 10944a2a386eSRichard Tran Mills Input Parameters: 109511a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 10964a2a386eSRichard Tran Mills . m - number of rows 10974a2a386eSRichard Tran Mills . n - number of columns 10984a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 10994a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 11004a2a386eSRichard Tran Mills (possibly different for each row) or NULL 11014a2a386eSRichard Tran Mills 11024a2a386eSRichard Tran Mills Output Parameter: 11034a2a386eSRichard Tran Mills . A - the matrix 11044a2a386eSRichard Tran Mills 110590147e49SRichard Tran Mills Options Database Keys: 110666b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 110766b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 110890147e49SRichard Tran Mills 110911a5261eSBarry Smith Note: 11104a2a386eSRichard Tran Mills If nnz is given then nz is ignored 11114a2a386eSRichard Tran Mills 11124a2a386eSRichard Tran Mills Level: intermediate 11134a2a386eSRichard Tran Mills 1114db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 11154a2a386eSRichard Tran Mills @*/ 1116*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1117*d71ae5a4SJacob Faibussowitsch { 11184a2a386eSRichard Tran Mills PetscFunctionBegin; 11199566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 11209566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 11219566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJMKL)); 11229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 11234a2a386eSRichard Tran Mills PetscFunctionReturn(0); 11244a2a386eSRichard Tran Mills } 11254a2a386eSRichard Tran Mills 1126*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1127*d71ae5a4SJacob Faibussowitsch { 11284a2a386eSRichard Tran Mills PetscFunctionBegin; 11299566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 11309566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 11314a2a386eSRichard Tran Mills PetscFunctionReturn(0); 11324a2a386eSRichard Tran Mills } 1133