1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 264a2a386eSRichard Tran Mills 279371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 284a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 294a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 304a2a386eSRichard Tran Mills Mat B = *newmat; 31ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 324a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 33c1d5218aSRichard Tran Mills #endif 344a2a386eSRichard Tran Mills 354a2a386eSRichard Tran Mills PetscFunctionBegin; 369566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 374a2a386eSRichard Tran Mills 384a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 3954871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 404a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 414a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4254871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 43ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4454871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 45ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 46190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 47431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 48e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 49190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 50190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 514f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 524a2a386eSRichard Tran Mills 539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 544222ddf1SHong Zhang 55ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 564abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 58e9c94282SRichard Tran Mills * the spptr pointer. */ 59a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 60a8327b06SKarl Rupp 61792fecdfSBarry Smith if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 62ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 639566063dSJacob Faibussowitsch PetscCall(PetscFree(B->spptr)); 644a2a386eSRichard Tran Mills 654a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 669566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 674a2a386eSRichard Tran Mills 684a2a386eSRichard Tran Mills *newmat = B; 694a2a386eSRichard Tran Mills PetscFunctionReturn(0); 704a2a386eSRichard Tran Mills } 714a2a386eSRichard Tran Mills 729371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) { 734a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 744a2a386eSRichard Tran Mills 754a2a386eSRichard Tran Mills PetscFunctionBegin; 76e9c94282SRichard Tran Mills 77edc89de7SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 78e9c94282SRichard Tran Mills if (aijmkl) { 794a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 80ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 81792fecdfSBarry Smith if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 824abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 839566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 84e9c94282SRichard Tran Mills } 854a2a386eSRichard Tran Mills 864a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 874a2a386eSRichard Tran Mills * to destroy everything that remains. */ 889566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 894a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 904a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 914a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 922e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 939566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 944a2a386eSRichard Tran Mills PetscFunctionReturn(0); 954a2a386eSRichard Tran Mills } 964a2a386eSRichard Tran Mills 97190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 985b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 995b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1005b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1015b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1025b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1035b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1049371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) { 105ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1066e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1076e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1086e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 10945fbe478SRichard Tran Mills PetscFunctionBegin; 1106e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1116e369cd5SRichard Tran Mills #else 112a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 113a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 114a8327b06SKarl Rupp PetscInt m, n; 115a8327b06SKarl Rupp MatScalar *aa; 116a8327b06SKarl Rupp PetscInt *aj, *ai; 1174a2a386eSRichard Tran Mills 118a8327b06SKarl Rupp PetscFunctionBegin; 119e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 120e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 121e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 122e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 123e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1246e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1254d51fa23SRichard Tran Mills #endif 1266e369cd5SRichard Tran Mills 1270632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1280632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1290632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 130792fecdfSBarry Smith PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 1310632b357SRichard Tran Mills } 1328d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1336e369cd5SRichard Tran Mills 134c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 135df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 136df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 137df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 13858678438SRichard Tran Mills m = A->rmap->n; 13958678438SRichard Tran Mills n = A->cmap->n; 140df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 141df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 142df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1431495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1448d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1458d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 146792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa); 147792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 148792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 149*48a46eb9SPierre Jolivet if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 1504abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 1519566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 152190ae7a4SRichard Tran Mills } else { 153190ae7a4SRichard Tran Mills aijmkl->csrA = PETSC_NULL; 154c9d46305SRichard Tran Mills } 1556e369cd5SRichard Tran Mills 1566e369cd5SRichard Tran Mills PetscFunctionReturn(0); 157d995685eSRichard Tran Mills #endif 1586e369cd5SRichard Tran Mills } 1596e369cd5SRichard Tran Mills 160b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 161190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 1629371c9d4SSatish Balay static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) { 16319afcda9SRichard Tran Mills sparse_index_base_t indexing; 164190ae7a4SRichard Tran Mills PetscInt m, n; 16545fbe478SRichard Tran Mills PetscInt *aj, *ai, *dummy; 16619afcda9SRichard Tran Mills MatScalar *aa; 16719afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 16819afcda9SRichard Tran Mills 169362febeeSStefano Zampini PetscFunctionBegin; 170190ae7a4SRichard Tran Mills if (csrA) { 17145fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 172792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa); 1735f80ce2aSJacob Faibussowitsch PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 174190ae7a4SRichard Tran Mills } else { 175190ae7a4SRichard Tran Mills aj = ai = PETSC_NULL; 176190ae7a4SRichard Tran Mills aa = PETSC_NULL; 177aab60f1bSRichard Tran Mills } 178190ae7a4SRichard Tran Mills 1799566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 1809566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 181aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 182aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 183aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 184aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 185190ae7a4SRichard Tran Mills if (csrA) { 1869566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 187190ae7a4SRichard Tran Mills } else { 188190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 1899566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 1909566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1919566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 192190ae7a4SRichard Tran Mills } 19319afcda9SRichard Tran Mills 19419afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 19519afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 1969566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1976c87cf42SRichard Tran Mills 19819afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL *)A->spptr; 19919afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2006c87cf42SRichard Tran Mills 20119afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 20219afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 20319afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 204f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 205f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 206f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 207190ae7a4SRichard Tran Mills if (csrA) { 208792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 209792fecdfSBarry Smith PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 2101950a7e7SRichard Tran Mills } 2119566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 21219afcda9SRichard Tran Mills PetscFunctionReturn(0); 21319afcda9SRichard Tran Mills } 214b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 215190ae7a4SRichard Tran Mills 216e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 217e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 218e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 219b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 2209371c9d4SSatish Balay static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) { 221e8be1fc7SRichard Tran Mills PetscInt i; 222e8be1fc7SRichard Tran Mills PetscInt nrows, ncols; 223e8be1fc7SRichard Tran Mills PetscInt nz; 224e8be1fc7SRichard Tran Mills PetscInt *ai, *aj, *dummy; 225e8be1fc7SRichard Tran Mills PetscScalar *aa; 2261495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 227e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 228e8be1fc7SRichard Tran Mills 229362febeeSStefano Zampini PetscFunctionBegin; 230190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 231190ae7a4SRichard Tran Mills if (!aijmkl->csrA) PetscFunctionReturn(0); 232190ae7a4SRichard Tran Mills 233e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 234792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 235e8be1fc7SRichard Tran Mills 236e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 237e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 238e8be1fc7SRichard Tran Mills for (i = 0; i < nrows; i++) { 239e8be1fc7SRichard Tran Mills nz = ai[i + 1] - ai[i]; 2409566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 241e8be1fc7SRichard Tran Mills } 242e8be1fc7SRichard Tran Mills 2439566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2449566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 245e8be1fc7SRichard Tran Mills 2469566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 247a7180b50SRichard Tran Mills /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 248a7180b50SRichard Tran Mills * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 249a7180b50SRichard Tran Mills * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 250a7180b50SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 251e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 252e8be1fc7SRichard Tran Mills } 253b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 254e8be1fc7SRichard Tran Mills 2553849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 2569371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) { 2573849ddb2SRichard Tran Mills PetscInt i, j, k; 2583849ddb2SRichard Tran Mills PetscInt nrows, ncols; 2593849ddb2SRichard Tran Mills PetscInt nz; 2603849ddb2SRichard Tran Mills PetscInt *ai, *aj, *dummy; 2613849ddb2SRichard Tran Mills PetscScalar *aa; 2621495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 2633849ddb2SRichard Tran Mills sparse_index_base_t indexing; 2643849ddb2SRichard Tran Mills 265362febeeSStefano Zampini PetscFunctionBegin; 2669566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 2673849ddb2SRichard Tran Mills 2683849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 2693849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 2709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 2713849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2723849ddb2SRichard Tran Mills } 2733849ddb2SRichard Tran Mills 2743849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 275792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 2763849ddb2SRichard Tran Mills 2773849ddb2SRichard Tran Mills k = 0; 2783849ddb2SRichard Tran Mills for (i = 0; i < nrows; i++) { 2799566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 2803849ddb2SRichard Tran Mills nz = ai[i + 1] - ai[i]; 2813849ddb2SRichard Tran Mills for (j = 0; j < nz; j++) { 2823849ddb2SRichard Tran Mills if (aa) { 2839566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 2843849ddb2SRichard Tran Mills } else { 2859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 2863849ddb2SRichard Tran Mills } 2873849ddb2SRichard Tran Mills k++; 2883849ddb2SRichard Tran Mills } 2899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 2903849ddb2SRichard Tran Mills } 2913849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2923849ddb2SRichard Tran Mills } 2933849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 2943849ddb2SRichard Tran Mills 2959371c9d4SSatish Balay PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) { 2961495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 2976e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 2986e369cd5SRichard Tran Mills 2996e369cd5SRichard Tran Mills PetscFunctionBegin; 3009566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 3016e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 3029566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 3036e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3041baa6e33SBarry Smith if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3056e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3066e369cd5SRichard Tran Mills } 3076e369cd5SRichard Tran Mills 3089371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) { 3096e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3105b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3116e369cd5SRichard Tran Mills 3126e369cd5SRichard Tran Mills PetscFunctionBegin; 3136e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3146e369cd5SRichard Tran Mills 3156e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3166e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3176e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3186e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 319d96e85feSRichard Tran Mills * a lot of code duplication. */ 3206e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 3226e369cd5SRichard Tran Mills 3235b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3245b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3255b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL *)A->spptr; 3261baa6e33SBarry Smith if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 327df555b71SRichard Tran Mills 3284a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3294a2a386eSRichard Tran Mills } 3304a2a386eSRichard Tran Mills 331e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 3329371c9d4SSatish Balay PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) { 3334a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3344a2a386eSRichard Tran Mills const PetscScalar *x; 3354a2a386eSRichard Tran Mills PetscScalar *y; 3364a2a386eSRichard Tran Mills const MatScalar *aa; 3374a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 338db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 339db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 340db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3414a2a386eSRichard Tran Mills const PetscInt *aj, *ai; 342db63039fSRichard Tran Mills char matdescra[6]; 343db63039fSRichard Tran Mills 3444a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 345ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 346ff03dc53SRichard Tran Mills 347ff03dc53SRichard Tran Mills PetscFunctionBegin; 348db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 349db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3509566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3519566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 352ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 353ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 354ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 355ff03dc53SRichard Tran Mills 356ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 357db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 358ff03dc53SRichard Tran Mills 3599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 3609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 362ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 363ff03dc53SRichard Tran Mills } 3641950a7e7SRichard Tran Mills #endif 365ff03dc53SRichard Tran Mills 366ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 3679371c9d4SSatish Balay PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 368df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 369df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 370df555b71SRichard Tran Mills const PetscScalar *x; 371df555b71SRichard Tran Mills PetscScalar *y; 372551aa5c8SRichard Tran Mills PetscObjectState state; 373df555b71SRichard Tran Mills 374df555b71SRichard Tran Mills PetscFunctionBegin; 375df555b71SRichard Tran Mills 37638987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 37738987b35SRichard Tran Mills if (!a->nz) { 3789566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 3799566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y, A->rmap->n)); 3809566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 38138987b35SRichard Tran Mills PetscFunctionReturn(0); 38238987b35SRichard Tran Mills } 383f36dfe3fSRichard Tran Mills 3849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 3859566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 386df555b71SRichard Tran Mills 3873fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3883fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3893fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3909566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 3919566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3923fa15762SRichard Tran Mills 393df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 394792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 395df555b71SRichard Tran Mills 3969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 3979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 3989566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 399df555b71SRichard Tran Mills PetscFunctionReturn(0); 400df555b71SRichard Tran Mills } 401d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 402df555b71SRichard Tran Mills 403e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 4049371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) { 405ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 406ff03dc53SRichard Tran Mills const PetscScalar *x; 407ff03dc53SRichard Tran Mills PetscScalar *y; 408ff03dc53SRichard Tran Mills const MatScalar *aa; 409ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 410db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 411db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 412db63039fSRichard Tran Mills PetscScalar beta = 0.0; 413ff03dc53SRichard Tran Mills const PetscInt *aj, *ai; 414db63039fSRichard Tran Mills char matdescra[6]; 415ff03dc53SRichard Tran Mills 416ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 417ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4184a2a386eSRichard Tran Mills 4194a2a386eSRichard Tran Mills PetscFunctionBegin; 420969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 421969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4239566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4244a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4254a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4264a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4274a2a386eSRichard Tran Mills 4284a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 429db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 4304a2a386eSRichard Tran Mills 4319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4329566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4339566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 4344a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4354a2a386eSRichard Tran Mills } 4361950a7e7SRichard Tran Mills #endif 4374a2a386eSRichard Tran Mills 438ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 4399371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 440df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 442df555b71SRichard Tran Mills const PetscScalar *x; 443df555b71SRichard Tran Mills PetscScalar *y; 444551aa5c8SRichard Tran Mills PetscObjectState state; 445df555b71SRichard Tran Mills 446df555b71SRichard Tran Mills PetscFunctionBegin; 447df555b71SRichard Tran Mills 44838987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 44938987b35SRichard Tran Mills if (!a->nz) { 4509566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 4519566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y, A->cmap->n)); 4529566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 45338987b35SRichard Tran Mills PetscFunctionReturn(0); 45438987b35SRichard Tran Mills } 455f36dfe3fSRichard Tran Mills 4569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4579566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 458df555b71SRichard Tran Mills 4593fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4603fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4613fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4629566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 4639566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4643fa15762SRichard Tran Mills 465df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 466792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 467df555b71SRichard Tran Mills 4689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 4699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 4709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 471df555b71SRichard Tran Mills PetscFunctionReturn(0); 472df555b71SRichard Tran Mills } 473d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 474df555b71SRichard Tran Mills 475e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 4769371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) { 4774a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 4784a2a386eSRichard Tran Mills const PetscScalar *x; 4794a2a386eSRichard Tran Mills PetscScalar *y, *z; 4804a2a386eSRichard Tran Mills const MatScalar *aa; 4814a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 482db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 4834a2a386eSRichard Tran Mills const PetscInt *aj, *ai; 4844a2a386eSRichard Tran Mills PetscInt i; 4854a2a386eSRichard Tran Mills 486ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 487ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 488a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 489db63039fSRichard Tran Mills PetscScalar beta; 490a84739b8SRichard Tran Mills char matdescra[6]; 491ff03dc53SRichard Tran Mills 492ff03dc53SRichard Tran Mills PetscFunctionBegin; 493a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 494a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 495a84739b8SRichard Tran Mills 4969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 4979566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 498ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 499ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 500ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 501ff03dc53SRichard Tran Mills 502ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 503a84739b8SRichard Tran Mills if (zz == yy) { 504a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 505db63039fSRichard Tran Mills beta = 1.0; 506db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 507a84739b8SRichard Tran Mills } else { 508db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 509db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 510db63039fSRichard Tran Mills beta = 0.0; 511db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 5129371c9d4SSatish Balay for (i = 0; i < m; i++) { z[i] += y[i]; } 513a84739b8SRichard Tran Mills } 514ff03dc53SRichard Tran Mills 5159566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 5169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 518ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 519ff03dc53SRichard Tran Mills } 5201950a7e7SRichard Tran Mills #endif 521ff03dc53SRichard Tran Mills 522ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 5239371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 524df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 525df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 526df555b71SRichard Tran Mills const PetscScalar *x; 527df555b71SRichard Tran Mills PetscScalar *y, *z; 528df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 529df555b71SRichard Tran Mills PetscInt i; 530df555b71SRichard Tran Mills 531df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 532551aa5c8SRichard Tran Mills PetscObjectState state; 533df555b71SRichard Tran Mills 534df555b71SRichard Tran Mills PetscFunctionBegin; 535df555b71SRichard Tran Mills 53638987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 53738987b35SRichard Tran Mills if (!a->nz) { 5389566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 5399566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, y, m)); 5409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 54138987b35SRichard Tran Mills PetscFunctionReturn(0); 54238987b35SRichard Tran Mills } 543df555b71SRichard Tran Mills 5449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5459566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 546df555b71SRichard Tran Mills 5473fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5483fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5493fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5509566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 5519566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 5523fa15762SRichard Tran Mills 553df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 554df555b71SRichard Tran Mills if (zz == yy) { 555df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 556df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 557792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 558df555b71SRichard Tran Mills } else { 559df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 560df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 561792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 5625f80ce2aSJacob Faibussowitsch for (i = 0; i < m; i++) z[i] += y[i]; 563df555b71SRichard Tran Mills } 564df555b71SRichard Tran Mills 5659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 5669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 5679566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 568df555b71SRichard Tran Mills PetscFunctionReturn(0); 569df555b71SRichard Tran Mills } 570d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 571df555b71SRichard Tran Mills 572e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 5739371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) { 574ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 575ff03dc53SRichard Tran Mills const PetscScalar *x; 576ff03dc53SRichard Tran Mills PetscScalar *y, *z; 577ff03dc53SRichard Tran Mills const MatScalar *aa; 578ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 579db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 580ff03dc53SRichard Tran Mills const PetscInt *aj, *ai; 581ff03dc53SRichard Tran Mills PetscInt i; 582ff03dc53SRichard Tran Mills 583ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 584ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 585a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 586db63039fSRichard Tran Mills PetscScalar beta; 587a84739b8SRichard Tran Mills char matdescra[6]; 5884a2a386eSRichard Tran Mills 5894a2a386eSRichard Tran Mills PetscFunctionBegin; 590a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 591a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 592a84739b8SRichard Tran Mills 5939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 5949566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 5954a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 5964a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 5974a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 5984a2a386eSRichard Tran Mills 5994a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 600a84739b8SRichard Tran Mills if (zz == yy) { 601a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 602db63039fSRichard Tran Mills beta = 1.0; 603969800c5SRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 604a84739b8SRichard Tran Mills } else { 605db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 606db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 607db63039fSRichard Tran Mills beta = 0.0; 608db63039fSRichard Tran Mills mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 6099371c9d4SSatish Balay for (i = 0; i < n; i++) { z[i] += y[i]; } 610a84739b8SRichard Tran Mills } 6114a2a386eSRichard Tran Mills 6129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 6139566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 6154a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6164a2a386eSRichard Tran Mills } 6171950a7e7SRichard Tran Mills #endif 6184a2a386eSRichard Tran Mills 619ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 6209371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 621df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 622df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 623df555b71SRichard Tran Mills const PetscScalar *x; 624df555b71SRichard Tran Mills PetscScalar *y, *z; 625969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 626df555b71SRichard Tran Mills PetscInt i; 627551aa5c8SRichard Tran Mills PetscObjectState state; 628df555b71SRichard Tran Mills 629df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 630df555b71SRichard Tran Mills 631df555b71SRichard Tran Mills PetscFunctionBegin; 632df555b71SRichard Tran Mills 63338987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 63438987b35SRichard Tran Mills if (!a->nz) { 6359566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 6369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, y, n)); 6379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 63838987b35SRichard Tran Mills PetscFunctionReturn(0); 63938987b35SRichard Tran Mills } 640f36dfe3fSRichard Tran Mills 6419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 6429566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 643df555b71SRichard Tran Mills 6443fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6453fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6463fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 6479566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 6485f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 6493fa15762SRichard Tran Mills 650df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 651df555b71SRichard Tran Mills if (zz == yy) { 652df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 653df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 654792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 655df555b71SRichard Tran Mills } else { 656df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 657df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 658792fecdfSBarry Smith PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 6595f80ce2aSJacob Faibussowitsch for (i = 0; i < n; i++) z[i] += y[i]; 660df555b71SRichard Tran Mills } 661df555b71SRichard Tran Mills 6629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 6639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 6649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 665df555b71SRichard Tran Mills PetscFunctionReturn(0); 666df555b71SRichard Tran Mills } 667d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 668df555b71SRichard Tran Mills 669190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */ 6708a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 6719371c9d4SSatish Balay static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) { 6721495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 673431879ecSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 674190ae7a4SRichard Tran Mills PetscInt nrows, ncols; 675431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 676431879ecSRichard Tran Mills PetscObjectState state; 677431879ecSRichard Tran Mills 678431879ecSRichard Tran Mills PetscFunctionBegin; 679190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 680190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 681190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 682190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 683190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 684190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 685190ae7a4SRichard Tran Mills 6869566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 6879566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 6889566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 6899566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 690431879ecSRichard Tran Mills csrA = a->csrA; 691431879ecSRichard Tran Mills csrB = b->csrA; 692431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 693431879ecSRichard Tran Mills 694190ae7a4SRichard Tran Mills if (csrA && csrB) { 695792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 696190ae7a4SRichard Tran Mills } else { 697190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 698190ae7a4SRichard Tran Mills } 699190ae7a4SRichard Tran Mills 7009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 701431879ecSRichard Tran Mills 702431879ecSRichard Tran Mills PetscFunctionReturn(0); 703431879ecSRichard Tran Mills } 704431879ecSRichard Tran Mills 7059371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) { 7061495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 707e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 708e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 709e8be1fc7SRichard Tran Mills PetscObjectState state; 710e8be1fc7SRichard Tran Mills 711e8be1fc7SRichard Tran Mills PetscFunctionBegin; 7129566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 7139566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 7149566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 7159566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 716e8be1fc7SRichard Tran Mills csrA = a->csrA; 717e8be1fc7SRichard Tran Mills csrB = b->csrA; 718e8be1fc7SRichard Tran Mills csrC = c->csrA; 719e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 720e8be1fc7SRichard Tran Mills 721190ae7a4SRichard Tran Mills if (csrA && csrB) { 722792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 723190ae7a4SRichard Tran Mills } else { 724190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 725190ae7a4SRichard Tran Mills } 726e8be1fc7SRichard Tran Mills 727e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 7289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 729e8be1fc7SRichard Tran Mills 730e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 731e8be1fc7SRichard Tran Mills } 732e8be1fc7SRichard Tran Mills 7339371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 734190ae7a4SRichard Tran Mills PetscFunctionBegin; 7359566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 736190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 737190ae7a4SRichard Tran Mills } 738190ae7a4SRichard Tran Mills 7399371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 740190ae7a4SRichard Tran Mills PetscFunctionBegin; 7419566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 742190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 743190ae7a4SRichard Tran Mills } 744190ae7a4SRichard Tran Mills 7459371c9d4SSatish Balay PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 746190ae7a4SRichard Tran Mills PetscFunctionBegin; 7479566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 748190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 749190ae7a4SRichard Tran Mills } 750190ae7a4SRichard Tran Mills 7519371c9d4SSatish Balay PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 752190ae7a4SRichard Tran Mills PetscFunctionBegin; 7539566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 754190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 755190ae7a4SRichard Tran Mills } 756190ae7a4SRichard Tran Mills 7579371c9d4SSatish Balay PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 758190ae7a4SRichard Tran Mills PetscFunctionBegin; 7599566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 760190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 761190ae7a4SRichard Tran Mills } 762190ae7a4SRichard Tran Mills 7639371c9d4SSatish Balay PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 764190ae7a4SRichard Tran Mills PetscFunctionBegin; 7659566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 766190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 767190ae7a4SRichard Tran Mills } 768190ae7a4SRichard Tran Mills 7699371c9d4SSatish Balay static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) { 770190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 771190ae7a4SRichard Tran Mills Mat A = product->A, B = product->B; 772190ae7a4SRichard Tran Mills 773190ae7a4SRichard Tran Mills PetscFunctionBegin; 7749566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 775190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 776190ae7a4SRichard Tran Mills } 777190ae7a4SRichard Tran Mills 7789371c9d4SSatish Balay static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) { 779190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 780190ae7a4SRichard Tran Mills Mat A = product->A, B = product->B; 781190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 782190ae7a4SRichard Tran Mills 783190ae7a4SRichard Tran Mills PetscFunctionBegin; 7849566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 785190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 786190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 787190ae7a4SRichard Tran Mills } 788190ae7a4SRichard Tran Mills 7899371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) { 790190ae7a4SRichard Tran Mills Mat Ct; 791190ae7a4SRichard Tran Mills Vec zeros; 7921495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 7934f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 7944f53af40SRichard Tran Mills PetscBool set, flag; 795b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 7964f53af40SRichard Tran Mills PetscObjectState state; 7974f53af40SRichard Tran Mills 7984f53af40SRichard Tran Mills PetscFunctionBegin; 7999566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 800b94d7dedSBarry Smith PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 8014f53af40SRichard Tran Mills 8029566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 8039566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8049566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 8059566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 8064f53af40SRichard Tran Mills csrA = a->csrA; 8074f53af40SRichard Tran Mills csrP = p->csrA; 8084f53af40SRichard Tran Mills csrC = c->csrA; 809b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 810190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 811b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8124f53af40SRichard Tran Mills 813f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 814792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 8154f53af40SRichard Tran Mills 816190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 817190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 818190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 8197301b172SPierre Jolivet * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 820190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 821190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 822190ae7a4SRichard Tran Mills * the full matrix. */ 8239566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 8249566063dSJacob Faibussowitsch PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 8259566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(C, &zeros, NULL)); 8269566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(zeros)); 8279566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(zeros)); 8289566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 8299566063dSJacob Faibussowitsch PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 830190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 8319566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C)); 8329566063dSJacob Faibussowitsch PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 8339566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 8349566063dSJacob Faibussowitsch PetscCall(VecDestroy(&zeros)); 8359566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ct)); 8364f53af40SRichard Tran Mills PetscFunctionReturn(0); 8374f53af40SRichard Tran Mills } 838190ae7a4SRichard Tran Mills 8399371c9d4SSatish Balay PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) { 840190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 841190ae7a4SRichard Tran Mills Mat A = product->A, P = product->B; 8421495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 843190ae7a4SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 844190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 845190ae7a4SRichard Tran Mills PetscObjectState state; 846190ae7a4SRichard Tran Mills 847190ae7a4SRichard Tran Mills PetscFunctionBegin; 8489566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 8499566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8509566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 8519566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 852190ae7a4SRichard Tran Mills csrA = a->csrA; 853190ae7a4SRichard Tran Mills csrP = p->csrA; 854190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 855190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 856190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 857190ae7a4SRichard Tran Mills 858190ae7a4SRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 859190ae7a4SRichard Tran Mills if (csrP && csrA) { 860792fecdfSBarry Smith PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 861190ae7a4SRichard Tran Mills } else { 862190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 863190ae7a4SRichard Tran Mills } 864190ae7a4SRichard Tran Mills 865190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 866190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 86749ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 86849ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 86949ba5396SRichard Tran Mills * is guaranteed. */ 8709566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 871190ae7a4SRichard Tran Mills 872190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 873190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 874190ae7a4SRichard Tran Mills } 875190ae7a4SRichard Tran Mills 8769371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) { 877190ae7a4SRichard Tran Mills PetscFunctionBegin; 878190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 879190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 880190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 881190ae7a4SRichard Tran Mills } 882190ae7a4SRichard Tran Mills 8839371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) { 884190ae7a4SRichard Tran Mills PetscFunctionBegin; 885190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 886190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 887190ae7a4SRichard Tran Mills } 888190ae7a4SRichard Tran Mills 8899371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) { 890190ae7a4SRichard Tran Mills PetscFunctionBegin; 891190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 892190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 893190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 894190ae7a4SRichard Tran Mills } 895190ae7a4SRichard Tran Mills 8969371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) { 897190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 898190ae7a4SRichard Tran Mills Mat A = product->A; 899190ae7a4SRichard Tran Mills PetscBool set, flag; 900190ae7a4SRichard Tran Mills 901190ae7a4SRichard Tran Mills PetscFunctionBegin; 902a3d67537SPierre Jolivet if (PetscDefined(USE_COMPLEX)) { 9032ab6f6a8SStefano Zampini /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 9042ab6f6a8SStefano Zampini * We do this in several other locations in this file. This works for the time being, but these 905190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 9062ab6f6a8SStefano Zampini * TODO: Add proper MATSEQAIJMKL implementations */ 907190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 908a3d67537SPierre Jolivet } else { 909190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 9109566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 911a3d67537SPierre Jolivet if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 912a3d67537SPierre Jolivet else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9131495fedeSRichard Tran Mills /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 914190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 915a3d67537SPierre Jolivet } 916190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 917190ae7a4SRichard Tran Mills } 918190ae7a4SRichard Tran Mills 9199371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) { 920190ae7a4SRichard Tran Mills PetscFunctionBegin; 9212ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 922190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 923190ae7a4SRichard Tran Mills } 924190ae7a4SRichard Tran Mills 9259371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) { 926190ae7a4SRichard Tran Mills PetscFunctionBegin; 9272ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 928190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 929190ae7a4SRichard Tran Mills } 930190ae7a4SRichard Tran Mills 9319371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) { 932190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 933190ae7a4SRichard Tran Mills 934190ae7a4SRichard Tran Mills PetscFunctionBegin; 935190ae7a4SRichard Tran Mills switch (product->type) { 9369371c9d4SSatish Balay case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); break; 9379371c9d4SSatish Balay case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); break; 9389371c9d4SSatish Balay case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); break; 9399371c9d4SSatish Balay case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); break; 9409371c9d4SSatish Balay case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); break; 9419371c9d4SSatish Balay case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); break; 9429371c9d4SSatish Balay default: break; 943190ae7a4SRichard Tran Mills } 944190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 945190ae7a4SRichard Tran Mills } 946431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 947190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */ 9484f53af40SRichard Tran Mills 9494a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 950510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 9514a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 9524a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 9539371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 9544a2a386eSRichard Tran Mills Mat B = *newmat; 9554a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 956c9d46305SRichard Tran Mills PetscBool set; 957e9c94282SRichard Tran Mills PetscBool sametype; 9584a2a386eSRichard Tran Mills 9594a2a386eSRichard Tran Mills PetscFunctionBegin; 9609566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 9614a2a386eSRichard Tran Mills 9629566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 963e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 964e9c94282SRichard Tran Mills 9659566063dSJacob Faibussowitsch PetscCall(PetscNewLog(B, &aijmkl)); 9664a2a386eSRichard Tran Mills B->spptr = (void *)aijmkl; 9674a2a386eSRichard Tran Mills 968df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 969969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 9704a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 9714a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 9724a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 973c9d46305SRichard Tran Mills 9744abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 975ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 976d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 977a8327b06SKarl Rupp #else 978d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 979d995685eSRichard Tran Mills #endif 9805b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 9814abfa3b3SRichard Tran Mills 9824abfa3b3SRichard Tran Mills /* Parse command line options. */ 983d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 9849566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 9859566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 986d0609cedSBarry Smith PetscOptionsEnd(); 987ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 988d995685eSRichard Tran Mills if (!aijmkl->no_SpMV2) { 9899566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 990d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 991d995685eSRichard Tran Mills } 992d995685eSRichard Tran Mills #endif 993c9d46305SRichard Tran Mills 994ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 995df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 996969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 997df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 998969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 9998a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1000190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1001190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1002190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1003190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1004190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1005ffcab697SRichard Tran Mills #if !defined(PETSC_USE_COMPLEX) 100649ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1007190ae7a4SRichard Tran Mills #else 1008190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 10094f53af40SRichard Tran Mills #endif 1010e8be1fc7SRichard Tran Mills #endif 10111950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 10121950a7e7SRichard Tran Mills 1013213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1014213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1015213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1016213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1017213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 10181950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 10194a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1020969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 10214a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1022969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1023c9d46305SRichard Tran Mills } 10241950a7e7SRichard Tran Mills #endif 10254a2a386eSRichard Tran Mills 10269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 10274a2a386eSRichard Tran Mills 10289566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 10294a2a386eSRichard Tran Mills *newmat = B; 10304a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10314a2a386eSRichard Tran Mills } 10324a2a386eSRichard Tran Mills 10334a2a386eSRichard Tran Mills /*@C 10344a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 10354a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 10364a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 103790147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 103890147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 1039597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1040597ee276SRichard Tran Mills symmetric A) operations are currently supported. 1041597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 104290147e49SRichard Tran Mills 1043d083f849SBarry Smith Collective 10444a2a386eSRichard Tran Mills 10454a2a386eSRichard Tran Mills Input Parameters: 10464a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 10474a2a386eSRichard Tran Mills . m - number of rows 10484a2a386eSRichard Tran Mills . n - number of columns 10494a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 10504a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 10514a2a386eSRichard Tran Mills (possibly different for each row) or NULL 10524a2a386eSRichard Tran Mills 10534a2a386eSRichard Tran Mills Output Parameter: 10544a2a386eSRichard Tran Mills . A - the matrix 10554a2a386eSRichard Tran Mills 105690147e49SRichard Tran Mills Options Database Keys: 105766b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 105866b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 105990147e49SRichard Tran Mills 10604a2a386eSRichard Tran Mills Notes: 10614a2a386eSRichard Tran Mills If nnz is given then nz is ignored 10624a2a386eSRichard Tran Mills 10634a2a386eSRichard Tran Mills Level: intermediate 10644a2a386eSRichard Tran Mills 1065db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 10664a2a386eSRichard Tran Mills @*/ 10679371c9d4SSatish Balay PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 10684a2a386eSRichard Tran Mills PetscFunctionBegin; 10699566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 10709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 10719566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJMKL)); 10729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 10734a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10744a2a386eSRichard Tran Mills } 10754a2a386eSRichard Tran Mills 10769371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) { 10774a2a386eSRichard Tran Mills PetscFunctionBegin; 10789566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 10799566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 10804a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10814a2a386eSRichard Tran Mills } 1082