1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills Mat B = *newmat; 32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 334a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 34c1d5218aSRichard Tran Mills #endif 354a2a386eSRichard Tran Mills 364a2a386eSRichard Tran Mills PetscFunctionBegin; 379566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 384a2a386eSRichard Tran Mills 394a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4054871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 414a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 424a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4354871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 44ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4554871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 46ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 524f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 534a2a386eSRichard Tran Mills 549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL)); 554222ddf1SHong Zhang 56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 574abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 59e9c94282SRichard Tran Mills * the spptr pointer. */ 60a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61a8327b06SKarl Rupp 625f80ce2aSJacob Faibussowitsch if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 649566063dSJacob Faibussowitsch PetscCall(PetscFree(B->spptr)); 654a2a386eSRichard Tran Mills 664a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 684a2a386eSRichard Tran Mills 694a2a386eSRichard Tran Mills *newmat = B; 704a2a386eSRichard Tran Mills PetscFunctionReturn(0); 714a2a386eSRichard Tran Mills } 724a2a386eSRichard Tran Mills 734a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 744a2a386eSRichard Tran Mills { 754a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 764a2a386eSRichard Tran Mills 774a2a386eSRichard Tran Mills PetscFunctionBegin; 78e9c94282SRichard Tran Mills 79edc89de7SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80e9c94282SRichard Tran Mills if (aijmkl) { 814a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 835f80ce2aSJacob Faibussowitsch if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 844abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 859566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 86e9c94282SRichard Tran Mills } 874a2a386eSRichard Tran Mills 884a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 894a2a386eSRichard Tran Mills * to destroy everything that remains. */ 909566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 914a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 924a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 934a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 94*2e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaijmkl_seqaij_C",NULL)); 959566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 964a2a386eSRichard Tran Mills PetscFunctionReturn(0); 974a2a386eSRichard Tran Mills } 984a2a386eSRichard Tran Mills 99190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1005b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1015b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1025b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1035b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1045b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1055b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1066e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1074a2a386eSRichard Tran Mills { 108ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1096e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1106e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1116e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 11245fbe478SRichard Tran Mills PetscFunctionBegin; 1136e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1146e369cd5SRichard Tran Mills #else 115a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 116a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 117a8327b06SKarl Rupp PetscInt m,n; 118a8327b06SKarl Rupp MatScalar *aa; 119a8327b06SKarl Rupp PetscInt *aj,*ai; 1204a2a386eSRichard Tran Mills 121a8327b06SKarl Rupp PetscFunctionBegin; 122e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 123e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 124e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 125e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 126e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1276e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1284d51fa23SRichard Tran Mills #endif 1296e369cd5SRichard Tran Mills 1300632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1310632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1320632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1335f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 1340632b357SRichard Tran Mills } 1358d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1366e369cd5SRichard Tran Mills 137c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 138df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 139df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 140df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 14158678438SRichard Tran Mills m = A->rmap->n; 14258678438SRichard Tran Mills n = A->cmap->n; 143df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 144df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 145df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1461495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1478d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1488d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 1495f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 1505f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 1515f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 1521950a7e7SRichard Tran Mills if (!aijmkl->no_SpMV2) { 1535f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA); 1541950a7e7SRichard Tran Mills } 1554abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 1569566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 157190ae7a4SRichard Tran Mills } else { 158190ae7a4SRichard Tran Mills aijmkl->csrA = PETSC_NULL; 159c9d46305SRichard Tran Mills } 1606e369cd5SRichard Tran Mills 1616e369cd5SRichard Tran Mills PetscFunctionReturn(0); 162d995685eSRichard Tran Mills #endif 1636e369cd5SRichard Tran Mills } 1646e369cd5SRichard Tran Mills 165b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 166190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 167190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 16819afcda9SRichard Tran Mills { 16919afcda9SRichard Tran Mills sparse_index_base_t indexing; 170190ae7a4SRichard Tran Mills PetscInt m,n; 17145fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 17219afcda9SRichard Tran Mills MatScalar *aa; 17319afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 17419afcda9SRichard Tran Mills 175362febeeSStefano Zampini PetscFunctionBegin; 176190ae7a4SRichard Tran Mills if (csrA) { 17745fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 1785f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 1795f80ce2aSJacob Faibussowitsch PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 180190ae7a4SRichard Tran Mills } else { 181190ae7a4SRichard Tran Mills aj = ai = PETSC_NULL; 182190ae7a4SRichard Tran Mills aa = PETSC_NULL; 183aab60f1bSRichard Tran Mills } 184190ae7a4SRichard Tran Mills 1859566063dSJacob Faibussowitsch PetscCall(MatSetType(A,MATSEQAIJ)); 1869566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols)); 187aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 188aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 189aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 190aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 191190ae7a4SRichard Tran Mills if (csrA) { 1929566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL)); 193190ae7a4SRichard Tran Mills } else { 194190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 1959566063dSJacob Faibussowitsch PetscCall(MatSetUp(A)); 1969566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1979566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 198190ae7a4SRichard Tran Mills } 19919afcda9SRichard Tran Mills 20019afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 20119afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 2029566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 2036c87cf42SRichard Tran Mills 20419afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 20519afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2066c87cf42SRichard Tran Mills 20719afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 20819afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 20919afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 210f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 211f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 212f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 213190ae7a4SRichard Tran Mills if (csrA) { 2145f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 2155f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 2161950a7e7SRichard Tran Mills } 2179566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 21819afcda9SRichard Tran Mills PetscFunctionReturn(0); 21919afcda9SRichard Tran Mills } 220b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 221190ae7a4SRichard Tran Mills 222e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 223e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 224e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 225b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 226190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 227e8be1fc7SRichard Tran Mills { 228e8be1fc7SRichard Tran Mills PetscInt i; 229e8be1fc7SRichard Tran Mills PetscInt nrows,ncols; 230e8be1fc7SRichard Tran Mills PetscInt nz; 231e8be1fc7SRichard Tran Mills PetscInt *ai,*aj,*dummy; 232e8be1fc7SRichard Tran Mills PetscScalar *aa; 2331495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 234e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 235e8be1fc7SRichard Tran Mills 236362febeeSStefano Zampini PetscFunctionBegin; 237190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 238190ae7a4SRichard Tran Mills if (!aijmkl->csrA) PetscFunctionReturn(0); 239190ae7a4SRichard Tran Mills 240e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 2415f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 242e8be1fc7SRichard Tran Mills 243e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 244e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 245e8be1fc7SRichard Tran Mills for (i=0; i<nrows; i++) { 246e8be1fc7SRichard Tran Mills nz = ai[i+1] - ai[i]; 2479566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES)); 248e8be1fc7SRichard Tran Mills } 249e8be1fc7SRichard Tran Mills 2509566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2519566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 252e8be1fc7SRichard Tran Mills 2539566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 254a7180b50SRichard Tran Mills /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 255a7180b50SRichard Tran Mills * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 256a7180b50SRichard Tran Mills * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 257a7180b50SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 258e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 259e8be1fc7SRichard Tran Mills } 260b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 261e8be1fc7SRichard Tran Mills 2623849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 2633849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 2643849ddb2SRichard Tran Mills { 2653849ddb2SRichard Tran Mills PetscInt i,j,k; 2663849ddb2SRichard Tran Mills PetscInt nrows,ncols; 2673849ddb2SRichard Tran Mills PetscInt nz; 2683849ddb2SRichard Tran Mills PetscInt *ai,*aj,*dummy; 2693849ddb2SRichard Tran Mills PetscScalar *aa; 2701495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 2713849ddb2SRichard Tran Mills sparse_index_base_t indexing; 2723849ddb2SRichard Tran Mills 273362febeeSStefano Zampini PetscFunctionBegin; 2749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 2753849ddb2SRichard Tran Mills 2763849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 2773849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 2789566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n")); 2793849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2803849ddb2SRichard Tran Mills } 2813849ddb2SRichard Tran Mills 2823849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 2835f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 2843849ddb2SRichard Tran Mills 2853849ddb2SRichard Tran Mills k = 0; 2863849ddb2SRichard Tran Mills for (i=0; i<nrows; i++) { 2879566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i)); 2883849ddb2SRichard Tran Mills nz = ai[i+1] - ai[i]; 2893849ddb2SRichard Tran Mills for (j=0; j<nz; j++) { 2903849ddb2SRichard Tran Mills if (aa) { 2919566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g) ",aj[k],PetscRealPart(aa[k]))); 2923849ddb2SRichard Tran Mills } else { 2939566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k])); 2943849ddb2SRichard Tran Mills } 2953849ddb2SRichard Tran Mills k++; 2963849ddb2SRichard Tran Mills } 2979566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 2983849ddb2SRichard Tran Mills } 2993849ddb2SRichard Tran Mills PetscFunctionReturn(0); 3003849ddb2SRichard Tran Mills } 3013849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 3023849ddb2SRichard Tran Mills 3036e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 3046e369cd5SRichard Tran Mills { 3051495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3066e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3076e369cd5SRichard Tran Mills 3086e369cd5SRichard Tran Mills PetscFunctionBegin; 3099566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A,op,M)); 3106e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 3119566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1)); 3126e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3135b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3155b49642aSRichard Tran Mills } 3166e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3176e369cd5SRichard Tran Mills } 3186e369cd5SRichard Tran Mills 3196e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 3206e369cd5SRichard Tran Mills { 3216e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3225b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3236e369cd5SRichard Tran Mills 3246e369cd5SRichard Tran Mills PetscFunctionBegin; 3256e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3266e369cd5SRichard Tran Mills 3276e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3286e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3296e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3306e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 331d96e85feSRichard Tran Mills * a lot of code duplication. */ 3326e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3339566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 3346e369cd5SRichard Tran Mills 3355b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3365b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3375b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3385b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 3405b49642aSRichard Tran Mills } 341df555b71SRichard Tran Mills 3424a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3434a2a386eSRichard Tran Mills } 3444a2a386eSRichard Tran Mills 345e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 3464a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3474a2a386eSRichard Tran Mills { 3484a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3494a2a386eSRichard Tran Mills const PetscScalar *x; 3504a2a386eSRichard Tran Mills PetscScalar *y; 3514a2a386eSRichard Tran Mills const MatScalar *aa; 3524a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 353db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 354db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 355db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3564a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 357db63039fSRichard Tran Mills char matdescra[6]; 358db63039fSRichard Tran Mills 3594a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 360ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 361ff03dc53SRichard Tran Mills 362ff03dc53SRichard Tran Mills PetscFunctionBegin; 363db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 364db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 3669566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 367ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 368ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 369ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 370ff03dc53SRichard Tran Mills 371ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 372db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 373ff03dc53SRichard Tran Mills 3749566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 3759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 3769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 377ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 378ff03dc53SRichard Tran Mills } 3791950a7e7SRichard Tran Mills #endif 380ff03dc53SRichard Tran Mills 381ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 382df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 383df555b71SRichard Tran Mills { 384df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 385df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 386df555b71SRichard Tran Mills const PetscScalar *x; 387df555b71SRichard Tran Mills PetscScalar *y; 388551aa5c8SRichard Tran Mills PetscObjectState state; 389df555b71SRichard Tran Mills 390df555b71SRichard Tran Mills PetscFunctionBegin; 391df555b71SRichard Tran Mills 39238987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 39338987b35SRichard Tran Mills if (!a->nz) { 3949566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 3959566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y,A->rmap->n)); 3969566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 39738987b35SRichard Tran Mills PetscFunctionReturn(0); 39838987b35SRichard Tran Mills } 399f36dfe3fSRichard Tran Mills 4009566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4019566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 402df555b71SRichard Tran Mills 4033fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4043fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4053fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4069566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 4079566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4083fa15762SRichard Tran Mills 409df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 4105f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 411df555b71SRichard Tran Mills 4129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 4139566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4149566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 415df555b71SRichard Tran Mills PetscFunctionReturn(0); 416df555b71SRichard Tran Mills } 417d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 418df555b71SRichard Tran Mills 419e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 420ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 421ff03dc53SRichard Tran Mills { 422ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 423ff03dc53SRichard Tran Mills const PetscScalar *x; 424ff03dc53SRichard Tran Mills PetscScalar *y; 425ff03dc53SRichard Tran Mills const MatScalar *aa; 426ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 427db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 428db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 429db63039fSRichard Tran Mills PetscScalar beta = 0.0; 430ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 431db63039fSRichard Tran Mills char matdescra[6]; 432ff03dc53SRichard Tran Mills 433ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 434ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4354a2a386eSRichard Tran Mills 4364a2a386eSRichard Tran Mills PetscFunctionBegin; 437969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 438969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4399566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4409566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 4414a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4424a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4434a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4444a2a386eSRichard Tran Mills 4454a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 446db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4474a2a386eSRichard Tran Mills 4489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 4499566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4509566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 4514a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4524a2a386eSRichard Tran Mills } 4531950a7e7SRichard Tran Mills #endif 4544a2a386eSRichard Tran Mills 455ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 456df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 457df555b71SRichard Tran Mills { 458df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 459df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 460df555b71SRichard Tran Mills const PetscScalar *x; 461df555b71SRichard Tran Mills PetscScalar *y; 462551aa5c8SRichard Tran Mills PetscObjectState state; 463df555b71SRichard Tran Mills 464df555b71SRichard Tran Mills PetscFunctionBegin; 465df555b71SRichard Tran Mills 46638987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 46738987b35SRichard Tran Mills if (!a->nz) { 4689566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 4699566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(y,A->cmap->n)); 4709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 47138987b35SRichard Tran Mills PetscFunctionReturn(0); 47238987b35SRichard Tran Mills } 473f36dfe3fSRichard Tran Mills 4749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 4759566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy,&y)); 476df555b71SRichard Tran Mills 4773fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4783fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4793fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4809566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 4819566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 4823fa15762SRichard Tran Mills 483df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 4845f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 485df555b71SRichard Tran Mills 4869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 4879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 4889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy,&y)); 489df555b71SRichard Tran Mills PetscFunctionReturn(0); 490df555b71SRichard Tran Mills } 491d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 492df555b71SRichard Tran Mills 493e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 4944a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 4954a2a386eSRichard Tran Mills { 4964a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4974a2a386eSRichard Tran Mills const PetscScalar *x; 4984a2a386eSRichard Tran Mills PetscScalar *y,*z; 4994a2a386eSRichard Tran Mills const MatScalar *aa; 5004a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 501db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 5024a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 5034a2a386eSRichard Tran Mills PetscInt i; 5044a2a386eSRichard Tran Mills 505ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 506ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 507a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 508db63039fSRichard Tran Mills PetscScalar beta; 509a84739b8SRichard Tran Mills char matdescra[6]; 510ff03dc53SRichard Tran Mills 511ff03dc53SRichard Tran Mills PetscFunctionBegin; 512a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 513a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 514a84739b8SRichard Tran Mills 5159566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 5169566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 517ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 518ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 519ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 520ff03dc53SRichard Tran Mills 521ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 522a84739b8SRichard Tran Mills if (zz == yy) { 523a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 524db63039fSRichard Tran Mills beta = 1.0; 525db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 526a84739b8SRichard Tran Mills } else { 527db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 528db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 529db63039fSRichard Tran Mills beta = 0.0; 530db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 531ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 532ff03dc53SRichard Tran Mills z[i] += y[i]; 533ff03dc53SRichard Tran Mills } 534a84739b8SRichard Tran Mills } 535ff03dc53SRichard Tran Mills 5369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz)); 5379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 539ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 540ff03dc53SRichard Tran Mills } 5411950a7e7SRichard Tran Mills #endif 542ff03dc53SRichard Tran Mills 543ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 544df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 545df555b71SRichard Tran Mills { 546df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 548df555b71SRichard Tran Mills const PetscScalar *x; 549df555b71SRichard Tran Mills PetscScalar *y,*z; 550df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 551df555b71SRichard Tran Mills PetscInt i; 552df555b71SRichard Tran Mills 553df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 554551aa5c8SRichard Tran Mills PetscObjectState state; 555df555b71SRichard Tran Mills 556df555b71SRichard Tran Mills PetscFunctionBegin; 557df555b71SRichard Tran Mills 55838987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 55938987b35SRichard Tran Mills if (!a->nz) { 5609566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 5619566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z,y,m)); 5629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 56338987b35SRichard Tran Mills PetscFunctionReturn(0); 56438987b35SRichard Tran Mills } 565df555b71SRichard Tran Mills 5669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 5679566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 568df555b71SRichard Tran Mills 5693fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5703fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5713fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5729566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 5739566063dSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 5743fa15762SRichard Tran Mills 575df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 576df555b71SRichard Tran Mills if (zz == yy) { 577df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 578df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 5795f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 580df555b71SRichard Tran Mills } else { 581df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 582df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 5835f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 5845f80ce2aSJacob Faibussowitsch for (i=0; i<m; i++) z[i] += y[i]; 585df555b71SRichard Tran Mills } 586df555b71SRichard Tran Mills 5879566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz)); 5889566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 5899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 590df555b71SRichard Tran Mills PetscFunctionReturn(0); 591df555b71SRichard Tran Mills } 592d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 593df555b71SRichard Tran Mills 594e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 595ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 596ff03dc53SRichard Tran Mills { 597ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 598ff03dc53SRichard Tran Mills const PetscScalar *x; 599ff03dc53SRichard Tran Mills PetscScalar *y,*z; 600ff03dc53SRichard Tran Mills const MatScalar *aa; 601ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 602db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 603ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 604ff03dc53SRichard Tran Mills PetscInt i; 605ff03dc53SRichard Tran Mills 606ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 607ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 608a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 609db63039fSRichard Tran Mills PetscScalar beta; 610a84739b8SRichard Tran Mills char matdescra[6]; 6114a2a386eSRichard Tran Mills 6124a2a386eSRichard Tran Mills PetscFunctionBegin; 613a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 614a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 615a84739b8SRichard Tran Mills 6169566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6179566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 6184a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6194a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6204a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6214a2a386eSRichard Tran Mills 6224a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 623a84739b8SRichard Tran Mills if (zz == yy) { 624a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 625db63039fSRichard Tran Mills beta = 1.0; 626969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 627a84739b8SRichard Tran Mills } else { 628db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 629db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 630db63039fSRichard Tran Mills beta = 0.0; 631db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 632969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6334a2a386eSRichard Tran Mills z[i] += y[i]; 6344a2a386eSRichard Tran Mills } 635a84739b8SRichard Tran Mills } 6364a2a386eSRichard Tran Mills 6379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz)); 6389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 6399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 6404a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6414a2a386eSRichard Tran Mills } 6421950a7e7SRichard Tran Mills #endif 6434a2a386eSRichard Tran Mills 644ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 645df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 646df555b71SRichard Tran Mills { 647df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 649df555b71SRichard Tran Mills const PetscScalar *x; 650df555b71SRichard Tran Mills PetscScalar *y,*z; 651969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 652df555b71SRichard Tran Mills PetscInt i; 653551aa5c8SRichard Tran Mills PetscObjectState state; 654df555b71SRichard Tran Mills 655df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 656df555b71SRichard Tran Mills 657df555b71SRichard Tran Mills PetscFunctionBegin; 658df555b71SRichard Tran Mills 65938987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 66038987b35SRichard Tran Mills if (!a->nz) { 6619566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 6629566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z,y,n)); 6639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 66438987b35SRichard Tran Mills PetscFunctionReturn(0); 66538987b35SRichard Tran Mills } 666f36dfe3fSRichard Tran Mills 6679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx,&x)); 6689566063dSJacob Faibussowitsch PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 669df555b71SRichard Tran Mills 6703fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6713fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6723fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 6739566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 6745f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 6753fa15762SRichard Tran Mills 676df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 677df555b71SRichard Tran Mills if (zz == yy) { 678df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 679df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 6805f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 681df555b71SRichard Tran Mills } else { 682df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 683df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 6845f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 6855f80ce2aSJacob Faibussowitsch for (i=0; i<n; i++) z[i] += y[i]; 686df555b71SRichard Tran Mills } 687df555b71SRichard Tran Mills 6889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0*a->nz)); 6899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx,&x)); 6909566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 691df555b71SRichard Tran Mills PetscFunctionReturn(0); 692df555b71SRichard Tran Mills } 693d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 694df555b71SRichard Tran Mills 695190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */ 6968a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 697190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 698431879ecSRichard Tran Mills { 6991495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 700431879ecSRichard Tran Mills sparse_matrix_t csrA,csrB,csrC; 701190ae7a4SRichard Tran Mills PetscInt nrows,ncols; 702431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 703431879ecSRichard Tran Mills PetscObjectState state; 704431879ecSRichard Tran Mills 705431879ecSRichard Tran Mills PetscFunctionBegin; 706190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 707190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 708190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 709190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 710190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 711190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 712190ae7a4SRichard Tran Mills 7139566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 7149566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 7159566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 7169566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 717431879ecSRichard Tran Mills csrA = a->csrA; 718431879ecSRichard Tran Mills csrB = b->csrA; 719431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 720431879ecSRichard Tran Mills 721190ae7a4SRichard Tran Mills if (csrA && csrB) { 7225f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 723190ae7a4SRichard Tran Mills } else { 724190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 725190ae7a4SRichard Tran Mills } 726190ae7a4SRichard Tran Mills 7279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C)); 728431879ecSRichard Tran Mills 729431879ecSRichard Tran Mills PetscFunctionReturn(0); 730431879ecSRichard Tran Mills } 731431879ecSRichard Tran Mills 732190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 733e8be1fc7SRichard Tran Mills { 7341495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 735e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 736e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 737e8be1fc7SRichard Tran Mills PetscObjectState state; 738e8be1fc7SRichard Tran Mills 739e8be1fc7SRichard Tran Mills PetscFunctionBegin; 7409566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 7419566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 7429566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 7439566063dSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 744e8be1fc7SRichard Tran Mills csrA = a->csrA; 745e8be1fc7SRichard Tran Mills csrB = b->csrA; 746e8be1fc7SRichard Tran Mills csrC = c->csrA; 747e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 748e8be1fc7SRichard Tran Mills 749190ae7a4SRichard Tran Mills if (csrA && csrB) { 7505f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 751190ae7a4SRichard Tran Mills } else { 752190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 753190ae7a4SRichard Tran Mills } 754e8be1fc7SRichard Tran Mills 755e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 7569566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 757e8be1fc7SRichard Tran Mills 758e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 759e8be1fc7SRichard Tran Mills } 760e8be1fc7SRichard Tran Mills 761190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 7624f53af40SRichard Tran Mills { 763190ae7a4SRichard Tran Mills PetscFunctionBegin; 7649566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 765190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 766190ae7a4SRichard Tran Mills } 767190ae7a4SRichard Tran Mills 768190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 769190ae7a4SRichard Tran Mills { 770190ae7a4SRichard Tran Mills PetscFunctionBegin; 7719566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 772190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 773190ae7a4SRichard Tran Mills } 774190ae7a4SRichard Tran Mills 775190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 776190ae7a4SRichard Tran Mills { 777190ae7a4SRichard Tran Mills PetscFunctionBegin; 7789566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 779190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 780190ae7a4SRichard Tran Mills } 781190ae7a4SRichard Tran Mills 782190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 783190ae7a4SRichard Tran Mills { 784190ae7a4SRichard Tran Mills PetscFunctionBegin; 7859566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 786190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 787190ae7a4SRichard Tran Mills } 788190ae7a4SRichard Tran Mills 789190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 790190ae7a4SRichard Tran Mills { 791190ae7a4SRichard Tran Mills PetscFunctionBegin; 7929566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 793190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 794190ae7a4SRichard Tran Mills } 795190ae7a4SRichard Tran Mills 796190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 797190ae7a4SRichard Tran Mills { 798190ae7a4SRichard Tran Mills PetscFunctionBegin; 7999566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 800190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 801190ae7a4SRichard Tran Mills } 802190ae7a4SRichard Tran Mills 803190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 804190ae7a4SRichard Tran Mills { 805190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 806190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 807190ae7a4SRichard Tran Mills 808190ae7a4SRichard Tran Mills PetscFunctionBegin; 8099566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C)); 810190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 811190ae7a4SRichard Tran Mills } 812190ae7a4SRichard Tran Mills 813190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 814190ae7a4SRichard Tran Mills { 815190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 816190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 817190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 818190ae7a4SRichard Tran Mills 819190ae7a4SRichard Tran Mills PetscFunctionBegin; 8209566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C)); 821190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 822190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 823190ae7a4SRichard Tran Mills } 824190ae7a4SRichard Tran Mills 82549ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 826190ae7a4SRichard Tran Mills { 827190ae7a4SRichard Tran Mills Mat Ct; 828190ae7a4SRichard Tran Mills Vec zeros; 8291495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 8304f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8314f53af40SRichard Tran Mills PetscBool set, flag; 832b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 8334f53af40SRichard Tran Mills PetscObjectState state; 8344f53af40SRichard Tran Mills 8354f53af40SRichard Tran Mills PetscFunctionBegin; 8369566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 83708401ef6SPierre Jolivet PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 8384f53af40SRichard Tran Mills 8399566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 8409566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8419566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 8429566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 8434f53af40SRichard Tran Mills csrA = a->csrA; 8444f53af40SRichard Tran Mills csrP = p->csrA; 8454f53af40SRichard Tran Mills csrC = c->csrA; 846b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 847190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 848b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8494f53af40SRichard Tran Mills 850f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 8515f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 8524f53af40SRichard Tran Mills 853190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 854190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 855190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 8567301b172SPierre Jolivet * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 857190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 858190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 859190ae7a4SRichard Tran Mills * the full matrix. */ 8609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 8619566063dSJacob Faibussowitsch PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct)); 8629566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(C,&zeros,NULL)); 8639566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(zeros)); 8649566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(zeros)); 8659566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES)); 8669566063dSJacob Faibussowitsch PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN)); 867190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 8689566063dSJacob Faibussowitsch PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C)); 8699566063dSJacob Faibussowitsch PetscCall(MatProductSetType(C,MATPRODUCT_PtAP)); 8709566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 8719566063dSJacob Faibussowitsch PetscCall(VecDestroy(&zeros)); 8729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ct)); 8734f53af40SRichard Tran Mills PetscFunctionReturn(0); 8744f53af40SRichard Tran Mills } 875190ae7a4SRichard Tran Mills 876190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 877190ae7a4SRichard Tran Mills { 878190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 879190ae7a4SRichard Tran Mills Mat A = product->A,P = product->B; 8801495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 881190ae7a4SRichard Tran Mills sparse_matrix_t csrA,csrP,csrC; 882190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 883190ae7a4SRichard Tran Mills PetscObjectState state; 884190ae7a4SRichard Tran Mills 885190ae7a4SRichard Tran Mills PetscFunctionBegin; 8869566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 8879566063dSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 8889566063dSJacob Faibussowitsch PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 8899566063dSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 890190ae7a4SRichard Tran Mills csrA = a->csrA; 891190ae7a4SRichard Tran Mills csrP = p->csrA; 892190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 893190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 894190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 895190ae7a4SRichard Tran Mills 896190ae7a4SRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 897190ae7a4SRichard Tran Mills if (csrP && csrA) { 8985f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 899190ae7a4SRichard Tran Mills } else { 900190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 901190ae7a4SRichard Tran Mills } 902190ae7a4SRichard Tran Mills 903190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 904190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 90549ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 90649ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 90749ba5396SRichard Tran Mills * is guaranteed. */ 9089566063dSJacob Faibussowitsch PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C)); 909190ae7a4SRichard Tran Mills 910190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 911190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 912190ae7a4SRichard Tran Mills } 913190ae7a4SRichard Tran Mills 914190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 915190ae7a4SRichard Tran Mills { 916190ae7a4SRichard Tran Mills PetscFunctionBegin; 917190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 918190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 919190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 920190ae7a4SRichard Tran Mills } 921190ae7a4SRichard Tran Mills 922190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 923190ae7a4SRichard Tran Mills { 924190ae7a4SRichard Tran Mills PetscFunctionBegin; 925190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 926190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 927190ae7a4SRichard Tran Mills } 928190ae7a4SRichard Tran Mills 929190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 930190ae7a4SRichard Tran Mills { 931190ae7a4SRichard Tran Mills PetscFunctionBegin; 932190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 933190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 934190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 935190ae7a4SRichard Tran Mills } 936190ae7a4SRichard Tran Mills 937190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 938190ae7a4SRichard Tran Mills { 939190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 940190ae7a4SRichard Tran Mills Mat A = product->A; 941190ae7a4SRichard Tran Mills PetscBool set, flag; 942190ae7a4SRichard Tran Mills 943190ae7a4SRichard Tran Mills PetscFunctionBegin; 944a3d67537SPierre Jolivet if (PetscDefined(USE_COMPLEX)) { 9452ab6f6a8SStefano Zampini /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 9462ab6f6a8SStefano Zampini * We do this in several other locations in this file. This works for the time being, but these 947190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 9482ab6f6a8SStefano Zampini * TODO: Add proper MATSEQAIJMKL implementations */ 949190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 950a3d67537SPierre Jolivet } else { 951190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 9529566063dSJacob Faibussowitsch PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 953a3d67537SPierre Jolivet if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 954a3d67537SPierre Jolivet else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9551495fedeSRichard Tran Mills /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 956190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 957a3d67537SPierre Jolivet } 958190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 959190ae7a4SRichard Tran Mills } 960190ae7a4SRichard Tran Mills 961190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 962190ae7a4SRichard Tran Mills { 963190ae7a4SRichard Tran Mills PetscFunctionBegin; 9642ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 965190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 966190ae7a4SRichard Tran Mills } 967190ae7a4SRichard Tran Mills 968190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 969190ae7a4SRichard Tran Mills { 970190ae7a4SRichard Tran Mills PetscFunctionBegin; 9712ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 972190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 973190ae7a4SRichard Tran Mills } 974190ae7a4SRichard Tran Mills 975190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 976190ae7a4SRichard Tran Mills { 977190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 978190ae7a4SRichard Tran Mills 979190ae7a4SRichard Tran Mills PetscFunctionBegin; 980190ae7a4SRichard Tran Mills switch (product->type) { 981190ae7a4SRichard Tran Mills case MATPRODUCT_AB: 9829566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 983190ae7a4SRichard Tran Mills break; 984190ae7a4SRichard Tran Mills case MATPRODUCT_AtB: 9859566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 986190ae7a4SRichard Tran Mills break; 987190ae7a4SRichard Tran Mills case MATPRODUCT_ABt: 9889566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 989190ae7a4SRichard Tran Mills break; 990190ae7a4SRichard Tran Mills case MATPRODUCT_PtAP: 9919566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 992190ae7a4SRichard Tran Mills break; 993190ae7a4SRichard Tran Mills case MATPRODUCT_RARt: 9949566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 995190ae7a4SRichard Tran Mills break; 996190ae7a4SRichard Tran Mills case MATPRODUCT_ABC: 9979566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 998190ae7a4SRichard Tran Mills break; 999190ae7a4SRichard Tran Mills default: 1000190ae7a4SRichard Tran Mills break; 1001190ae7a4SRichard Tran Mills } 1002190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1003190ae7a4SRichard Tran Mills } 1004431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1005190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */ 10064f53af40SRichard Tran Mills 10074a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1008510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 10094a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 10104a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 10114a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 10124a2a386eSRichard Tran Mills { 10134a2a386eSRichard Tran Mills Mat B = *newmat; 10144a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1015c9d46305SRichard Tran Mills PetscBool set; 1016e9c94282SRichard Tran Mills PetscBool sametype; 10174a2a386eSRichard Tran Mills 10184a2a386eSRichard Tran Mills PetscFunctionBegin; 10199566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 10204a2a386eSRichard Tran Mills 10219566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 1022e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 1023e9c94282SRichard Tran Mills 10249566063dSJacob Faibussowitsch PetscCall(PetscNewLog(B,&aijmkl)); 10254a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 10264a2a386eSRichard Tran Mills 1027df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 1028969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 10294a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 10304a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 10314a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 1032c9d46305SRichard Tran Mills 10334abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1034ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1035d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1036a8327b06SKarl Rupp #else 1037d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1038d995685eSRichard Tran Mills #endif 10395b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 10404abfa3b3SRichard Tran Mills 10414abfa3b3SRichard Tran Mills /* Parse command line options. */ 1042d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat"); 10439566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set)); 10449566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set)); 1045d0609cedSBarry Smith PetscOptionsEnd(); 1046ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1047d995685eSRichard Tran Mills if (!aijmkl->no_SpMV2) { 10489566063dSJacob Faibussowitsch PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1049d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1050d995685eSRichard Tran Mills } 1051d995685eSRichard Tran Mills #endif 1052c9d46305SRichard Tran Mills 1053ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1054df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1055969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1056df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1057969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 10588a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1059190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1060190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1061190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1062190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1063190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1064ffcab697SRichard Tran Mills # if !defined(PETSC_USE_COMPLEX) 106549ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1066190ae7a4SRichard Tran Mills # else 1067190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 10684f53af40SRichard Tran Mills # endif 1069e8be1fc7SRichard Tran Mills # endif 10701950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 10711950a7e7SRichard Tran Mills 1072213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1073213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1074213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1075213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1076213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 10771950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 10784a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1079969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 10804a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1081969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1082c9d46305SRichard Tran Mills } 10831950a7e7SRichard Tran Mills #endif 10844a2a386eSRichard Tran Mills 10859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ)); 10864a2a386eSRichard Tran Mills 10879566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL)); 10884a2a386eSRichard Tran Mills *newmat = B; 10894a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10904a2a386eSRichard Tran Mills } 10914a2a386eSRichard Tran Mills 10924a2a386eSRichard Tran Mills /*@C 10934a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 10944a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 10954a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 109690147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 109790147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 1098597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1099597ee276SRichard Tran Mills symmetric A) operations are currently supported. 1100597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 110190147e49SRichard Tran Mills 1102d083f849SBarry Smith Collective 11034a2a386eSRichard Tran Mills 11044a2a386eSRichard Tran Mills Input Parameters: 11054a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 11064a2a386eSRichard Tran Mills . m - number of rows 11074a2a386eSRichard Tran Mills . n - number of columns 11084a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 11094a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 11104a2a386eSRichard Tran Mills (possibly different for each row) or NULL 11114a2a386eSRichard Tran Mills 11124a2a386eSRichard Tran Mills Output Parameter: 11134a2a386eSRichard Tran Mills . A - the matrix 11144a2a386eSRichard Tran Mills 111590147e49SRichard Tran Mills Options Database Keys: 111666b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 111766b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 111890147e49SRichard Tran Mills 11194a2a386eSRichard Tran Mills Notes: 11204a2a386eSRichard Tran Mills If nnz is given then nz is ignored 11214a2a386eSRichard Tran Mills 11224a2a386eSRichard Tran Mills Level: intermediate 11234a2a386eSRichard Tran Mills 1124db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 11254a2a386eSRichard Tran Mills @*/ 11264a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 11274a2a386eSRichard Tran Mills { 11284a2a386eSRichard Tran Mills PetscFunctionBegin; 11299566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 11309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,m,n)); 11319566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATSEQAIJMKL)); 11329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz)); 11334a2a386eSRichard Tran Mills PetscFunctionReturn(0); 11344a2a386eSRichard Tran Mills } 11354a2a386eSRichard Tran Mills 11364a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 11374a2a386eSRichard Tran Mills { 11384a2a386eSRichard Tran Mills PetscFunctionBegin; 11399566063dSJacob Faibussowitsch PetscCall(MatSetType(A,MATSEQAIJ)); 11409566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 11414a2a386eSRichard Tran Mills PetscFunctionReturn(0); 11424a2a386eSRichard Tran Mills } 1143