14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 16c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 175b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 184abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19551aa5c8SRichard Tran Mills PetscObjectState state; 20b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22df555b71SRichard Tran Mills struct matrix_descr descr; 23b8cbc1fbSRichard Tran Mills #endif 244a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 254a2a386eSRichard Tran Mills 264a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 274a2a386eSRichard Tran Mills 284a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 294a2a386eSRichard Tran Mills { 304a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 314a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 324a2a386eSRichard Tran Mills PetscErrorCode ierr; 334a2a386eSRichard Tran Mills Mat B = *newmat; 34c1d5218aSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 354a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36c1d5218aSRichard Tran Mills #endif 374a2a386eSRichard Tran Mills 384a2a386eSRichard Tran Mills PetscFunctionBegin; 394a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 404a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 414a2a386eSRichard Tran Mills } 424a2a386eSRichard Tran Mills 434a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4454871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 464a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4754871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 48ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4954871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 50ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 5145fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 534f53af40SRichard Tran Mills B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 544f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55372ec6bbSRichard Tran Mills B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 564a2a386eSRichard Tran Mills 57e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 6145fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 624a940b00SSatish Balay if(!aijmkl->no_SpMV2) { 6345fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 65e8be1fc7SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66e8be1fc7SRichard Tran Mills #endif 67372ec6bbSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 6845fbe478SRichard Tran Mills } 69e9c94282SRichard Tran Mills 704abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 71e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 72e9c94282SRichard Tran Mills * the spptr pointer. */ 73a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 74a8327b06SKarl Rupp 754abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 760632b357SRichard Tran Mills sparse_status_t stat; 774abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 789c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 794abfa3b3SRichard Tran Mills } 804abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 824a2a386eSRichard Tran Mills 834a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 844a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 854a2a386eSRichard Tran Mills 864a2a386eSRichard Tran Mills *newmat = B; 874a2a386eSRichard Tran Mills PetscFunctionReturn(0); 884a2a386eSRichard Tran Mills } 894a2a386eSRichard Tran Mills 904a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 914a2a386eSRichard Tran Mills { 924a2a386eSRichard Tran Mills PetscErrorCode ierr; 934a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 944a2a386eSRichard Tran Mills 954a2a386eSRichard Tran Mills PetscFunctionBegin; 96e9c94282SRichard Tran Mills 97e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98e9c94282SRichard Tran Mills * spptr pointer. */ 99e9c94282SRichard Tran Mills if (aijmkl) { 1004a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 1014abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1024abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 1034abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 1044abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1059c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 1064abfa3b3SRichard Tran Mills } 1074abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1084a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 109e9c94282SRichard Tran Mills } 1104a2a386eSRichard Tran Mills 1114a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1124a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1134a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1144a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1154a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1164a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1174a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1184a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1194a2a386eSRichard Tran Mills } 1204a2a386eSRichard Tran Mills 1215b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1225b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1235b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1245b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1255b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1265b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1275b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1286e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1294a2a386eSRichard Tran Mills { 1306e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1316e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1326e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1336e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 13445fbe478SRichard Tran Mills PetscFunctionBegin; 1356e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1366e369cd5SRichard Tran Mills #else 137a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 138a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 139a8327b06SKarl Rupp PetscInt m,n; 140a8327b06SKarl Rupp MatScalar *aa; 141a8327b06SKarl Rupp PetscInt *aj,*ai; 1426e369cd5SRichard Tran Mills sparse_status_t stat; 143551aa5c8SRichard Tran Mills PetscErrorCode ierr; 1444a2a386eSRichard Tran Mills 145a8327b06SKarl Rupp PetscFunctionBegin; 146*4d51fa23SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M) 147*4d51fa23SRichard Tran Mills /* Versions of MKL that don't have mkl_sparse_sp2m() still support the old, non-inspector-executor interfaces. For these versions, 148*4d51fa23SRichard Tran Mills * we simply exit. Versions that do have mkl_sparse_sp2m() (version 18, update 2 and later) have deprecated the old interfaces. 149*4d51fa23SRichard Tran Mills * In this case, we must use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by 150*4d51fa23SRichard Tran Mills * not calling mkl_sparse_optimize() later. */ 1516e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 152*4d51fa23SRichard Tran Mills #endif 1536e369cd5SRichard Tran Mills 1540632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1550632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1560632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1570632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1589c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 1590632b357SRichard Tran Mills } 1608d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1616e369cd5SRichard Tran Mills 162c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 163df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 164df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 165df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 16658678438SRichard Tran Mills m = A->rmap->n; 16758678438SRichard Tran Mills n = A->cmap->n; 168df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 169df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 170df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 17180095d54SIrina Sokolova if ((a->nz!=0) & !(A->structure_only)) { 1728d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1738d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 17458678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 175e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 176df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 177e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 178df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 179e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 18050a5026bSRichard Tran Mills if (!aijmkl->no_SpMV2) { 181df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 182e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 18350a5026bSRichard Tran Mills } 1844abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 185e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 186c9d46305SRichard Tran Mills } 1876e369cd5SRichard Tran Mills 1886e369cd5SRichard Tran Mills PetscFunctionReturn(0); 189d995685eSRichard Tran Mills #endif 1906e369cd5SRichard Tran Mills } 1916e369cd5SRichard Tran Mills 19219afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 19319afcda9SRichard Tran Mills * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 1946c87cf42SRichard Tran Mills * matrix handle. 195aab60f1bSRichard Tran Mills * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 196aab60f1bSRichard Tran Mills * there is no good alternative. */ 19719afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1986c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 19919afcda9SRichard Tran Mills { 20019afcda9SRichard Tran Mills PetscErrorCode ierr; 20119afcda9SRichard Tran Mills sparse_status_t stat; 20219afcda9SRichard Tran Mills sparse_index_base_t indexing; 20319afcda9SRichard Tran Mills PetscInt nrows, ncols; 20445fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 20519afcda9SRichard Tran Mills MatScalar *aa; 20619afcda9SRichard Tran Mills Mat A; 20719afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 20819afcda9SRichard Tran Mills 20945fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 21045fbe478SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 2119c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 2126c87cf42SRichard Tran Mills 213aab60f1bSRichard Tran Mills if (reuse == MAT_REUSE_MATRIX) { 214aab60f1bSRichard Tran Mills ierr = MatDestroy(mat);CHKERRQ(ierr); 215aab60f1bSRichard Tran Mills } 21619afcda9SRichard Tran Mills ierr = MatCreate(comm,&A);CHKERRQ(ierr); 21719afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 21845fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 219aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 220aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 221aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 222aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 22319afcda9SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 22419afcda9SRichard Tran Mills 22519afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 22619afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 22719afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2286c87cf42SRichard Tran Mills 22919afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 23019afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2316c87cf42SRichard Tran Mills 23219afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 23319afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 23419afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 235f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 236f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 237f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 23819afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 23951539a68SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 24019afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 24151539a68SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 24250a5026bSRichard Tran Mills if (!aijmkl->no_SpMV2) { 24319afcda9SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 24451539a68SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 24550a5026bSRichard Tran Mills } 24619afcda9SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 247e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 24819afcda9SRichard Tran Mills 24919afcda9SRichard Tran Mills *mat = A; 25019afcda9SRichard Tran Mills PetscFunctionReturn(0); 25119afcda9SRichard Tran Mills } 25219afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 25319afcda9SRichard Tran Mills 254e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 255e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 256e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 257e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 258e8be1fc7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 259e8be1fc7SRichard Tran Mills { 260e8be1fc7SRichard Tran Mills PetscInt i; 261e8be1fc7SRichard Tran Mills PetscInt nrows,ncols; 262e8be1fc7SRichard Tran Mills PetscInt nz; 263e8be1fc7SRichard Tran Mills PetscInt *ai,*aj,*dummy; 264e8be1fc7SRichard Tran Mills PetscScalar *aa; 265e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 266e8be1fc7SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 267e8be1fc7SRichard Tran Mills sparse_status_t stat; 268e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 269e8be1fc7SRichard Tran Mills 270e8be1fc7SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 271e8be1fc7SRichard Tran Mills 272e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 273e8be1fc7SRichard Tran Mills stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 274e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 275e8be1fc7SRichard Tran Mills 276e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 277e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 278e8be1fc7SRichard Tran Mills for (i=0; i<nrows; i++) { 279e8be1fc7SRichard Tran Mills nz = ai[i+1] - ai[i]; 280e8be1fc7SRichard Tran Mills ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 281e8be1fc7SRichard Tran Mills } 282e8be1fc7SRichard Tran Mills 283e8be1fc7SRichard Tran Mills ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284e8be1fc7SRichard Tran Mills ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285e8be1fc7SRichard Tran Mills 286e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 287e995cf24SRichard Tran Mills /* We mark our matrix as having a valid, optimized MKL handle. 288e995cf24SRichard Tran Mills * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 289e995cf24SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 290e995cf24SRichard Tran Mills 291e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 292e8be1fc7SRichard Tran Mills } 293e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 294e8be1fc7SRichard Tran Mills 2956e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 2966e369cd5SRichard Tran Mills { 2976e369cd5SRichard Tran Mills PetscErrorCode ierr; 2986e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2996e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3006e369cd5SRichard Tran Mills 3016e369cd5SRichard Tran Mills PetscFunctionBegin; 3026e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 3036e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 3046e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 3056e369cd5SRichard Tran Mills ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 3066e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3075b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3086e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3095b49642aSRichard Tran Mills } 3106e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3116e369cd5SRichard Tran Mills } 3126e369cd5SRichard Tran Mills 3136e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 3146e369cd5SRichard Tran Mills { 3156e369cd5SRichard Tran Mills PetscErrorCode ierr; 3166e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3175b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3186e369cd5SRichard Tran Mills 3196e369cd5SRichard Tran Mills PetscFunctionBegin; 3206e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3216e369cd5SRichard Tran Mills 3226e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3236e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3246e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3256e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 326d96e85feSRichard Tran Mills * a lot of code duplication. */ 3276e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3286e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 3296e369cd5SRichard Tran Mills 3305b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3315b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3325b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 3335b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3346e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3355b49642aSRichard Tran Mills } 336df555b71SRichard Tran Mills 3374a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3384a2a386eSRichard Tran Mills } 3394a2a386eSRichard Tran Mills 34050a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 3414a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3424a2a386eSRichard Tran Mills { 3434a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3444a2a386eSRichard Tran Mills const PetscScalar *x; 3454a2a386eSRichard Tran Mills PetscScalar *y; 3464a2a386eSRichard Tran Mills const MatScalar *aa; 3474a2a386eSRichard Tran Mills PetscErrorCode ierr; 3484a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 349db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 350db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 351db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3524a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 353db63039fSRichard Tran Mills char matdescra[6]; 354db63039fSRichard Tran Mills 3554a2a386eSRichard Tran Mills 3564a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 357ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 358ff03dc53SRichard Tran Mills 359ff03dc53SRichard Tran Mills PetscFunctionBegin; 360db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 361db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 362ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 363ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 364ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 365ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 366ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 367ff03dc53SRichard Tran Mills 368ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 369db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 370ff03dc53SRichard Tran Mills 371ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 372ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 373ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 374ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 375ff03dc53SRichard Tran Mills } 37650a5026bSRichard Tran Mills #endif 377ff03dc53SRichard Tran Mills 378d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 379df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 380df555b71SRichard Tran Mills { 381df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 382df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 383df555b71SRichard Tran Mills const PetscScalar *x; 384df555b71SRichard Tran Mills PetscScalar *y; 385df555b71SRichard Tran Mills PetscErrorCode ierr; 386df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 387551aa5c8SRichard Tran Mills PetscObjectState state; 388df555b71SRichard Tran Mills 389df555b71SRichard Tran Mills PetscFunctionBegin; 390df555b71SRichard Tran Mills 39138987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 39238987b35SRichard Tran Mills if(!a->nz) { 39338987b35SRichard Tran Mills PetscInt i; 39438987b35SRichard Tran Mills PetscInt m=A->rmap->n; 39538987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 39638987b35SRichard Tran Mills for (i=0; i<m; i++) { 39738987b35SRichard Tran Mills y[i] = 0.0; 39838987b35SRichard Tran Mills } 39938987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 40038987b35SRichard Tran Mills PetscFunctionReturn(0); 40138987b35SRichard Tran Mills } 402f36dfe3fSRichard Tran Mills 403df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 404df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 405df555b71SRichard Tran Mills 4063fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4073fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4083fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 409551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 410551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 4113fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4123fa15762SRichard Tran Mills } 4133fa15762SRichard Tran Mills 414df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 415df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 4169c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 417df555b71SRichard Tran Mills 418df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 419df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 420df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421df555b71SRichard Tran Mills PetscFunctionReturn(0); 422df555b71SRichard Tran Mills } 423d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 424df555b71SRichard Tran Mills 42550a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 426ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 427ff03dc53SRichard Tran Mills { 428ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 429ff03dc53SRichard Tran Mills const PetscScalar *x; 430ff03dc53SRichard Tran Mills PetscScalar *y; 431ff03dc53SRichard Tran Mills const MatScalar *aa; 432ff03dc53SRichard Tran Mills PetscErrorCode ierr; 433ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 434db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 435db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 436db63039fSRichard Tran Mills PetscScalar beta = 0.0; 437ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 438db63039fSRichard Tran Mills char matdescra[6]; 439ff03dc53SRichard Tran Mills 440ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 441ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4424a2a386eSRichard Tran Mills 4434a2a386eSRichard Tran Mills PetscFunctionBegin; 444969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 445969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4464a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4474a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 4484a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4494a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4504a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4514a2a386eSRichard Tran Mills 4524a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 453db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4544a2a386eSRichard Tran Mills 4554a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 4564a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4574a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 4584a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4594a2a386eSRichard Tran Mills } 46050a5026bSRichard Tran Mills #endif 4614a2a386eSRichard Tran Mills 462d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 463df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 464df555b71SRichard Tran Mills { 465df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 467df555b71SRichard Tran Mills const PetscScalar *x; 468df555b71SRichard Tran Mills PetscScalar *y; 469df555b71SRichard Tran Mills PetscErrorCode ierr; 4700632b357SRichard Tran Mills sparse_status_t stat; 471551aa5c8SRichard Tran Mills PetscObjectState state; 472df555b71SRichard Tran Mills 473df555b71SRichard Tran Mills PetscFunctionBegin; 474df555b71SRichard Tran Mills 47538987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 47638987b35SRichard Tran Mills if(!a->nz) { 47738987b35SRichard Tran Mills PetscInt i; 47838987b35SRichard Tran Mills PetscInt n=A->cmap->n; 47938987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 48038987b35SRichard Tran Mills for (i=0; i<n; i++) { 48138987b35SRichard Tran Mills y[i] = 0.0; 48238987b35SRichard Tran Mills } 48338987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 48438987b35SRichard Tran Mills PetscFunctionReturn(0); 48538987b35SRichard Tran Mills } 486f36dfe3fSRichard Tran Mills 487df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 488df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 489df555b71SRichard Tran Mills 4903fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4913fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4923fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 493551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 494551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 4953fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4963fa15762SRichard Tran Mills } 4973fa15762SRichard Tran Mills 498df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 499df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 5009c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 501df555b71SRichard Tran Mills 502df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 503df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 505df555b71SRichard Tran Mills PetscFunctionReturn(0); 506df555b71SRichard Tran Mills } 507d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 508df555b71SRichard Tran Mills 50950a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 5104a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 5114a2a386eSRichard Tran Mills { 5124a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5134a2a386eSRichard Tran Mills const PetscScalar *x; 5144a2a386eSRichard Tran Mills PetscScalar *y,*z; 5154a2a386eSRichard Tran Mills const MatScalar *aa; 5164a2a386eSRichard Tran Mills PetscErrorCode ierr; 5174a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 518db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 5194a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 5204a2a386eSRichard Tran Mills PetscInt i; 5214a2a386eSRichard Tran Mills 522ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 523ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 524a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 525db63039fSRichard Tran Mills PetscScalar beta; 526a84739b8SRichard Tran Mills char matdescra[6]; 527ff03dc53SRichard Tran Mills 528ff03dc53SRichard Tran Mills PetscFunctionBegin; 529a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 530a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 531a84739b8SRichard Tran Mills 532ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 533ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 534ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 535ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 536ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 537ff03dc53SRichard Tran Mills 538ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 539a84739b8SRichard Tran Mills if (zz == yy) { 540a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 541db63039fSRichard Tran Mills beta = 1.0; 542db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 543a84739b8SRichard Tran Mills } else { 544db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 545db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 546db63039fSRichard Tran Mills beta = 0.0; 547db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 548ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 549ff03dc53SRichard Tran Mills z[i] += y[i]; 550ff03dc53SRichard Tran Mills } 551a84739b8SRichard Tran Mills } 552ff03dc53SRichard Tran Mills 553ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 554ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 555ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 556ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 557ff03dc53SRichard Tran Mills } 55850a5026bSRichard Tran Mills #endif 559ff03dc53SRichard Tran Mills 560d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 561df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 562df555b71SRichard Tran Mills { 563df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 565df555b71SRichard Tran Mills const PetscScalar *x; 566df555b71SRichard Tran Mills PetscScalar *y,*z; 567df555b71SRichard Tran Mills PetscErrorCode ierr; 568df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 569df555b71SRichard Tran Mills PetscInt i; 570df555b71SRichard Tran Mills 571df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 572df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 573551aa5c8SRichard Tran Mills PetscObjectState state; 574df555b71SRichard Tran Mills 575df555b71SRichard Tran Mills PetscFunctionBegin; 576df555b71SRichard Tran Mills 57738987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 57838987b35SRichard Tran Mills if(!a->nz) { 57938987b35SRichard Tran Mills PetscInt i; 58038987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 58138987b35SRichard Tran Mills for (i=0; i<m; i++) { 58238987b35SRichard Tran Mills z[i] = y[i]; 58338987b35SRichard Tran Mills } 58438987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 58538987b35SRichard Tran Mills PetscFunctionReturn(0); 58638987b35SRichard Tran Mills } 587df555b71SRichard Tran Mills 588df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 589df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 590df555b71SRichard Tran Mills 5913fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5923fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5933fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 594551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 595551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 5963fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5973fa15762SRichard Tran Mills } 5983fa15762SRichard Tran Mills 599df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 600df555b71SRichard Tran Mills if (zz == yy) { 601df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 602df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 603db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 6049c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 605df555b71SRichard Tran Mills } else { 606df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 607df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 608db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 6099c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 610df555b71SRichard Tran Mills for (i=0; i<m; i++) { 611df555b71SRichard Tran Mills z[i] += y[i]; 612df555b71SRichard Tran Mills } 613df555b71SRichard Tran Mills } 614df555b71SRichard Tran Mills 615df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 616df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 617df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 618df555b71SRichard Tran Mills PetscFunctionReturn(0); 619df555b71SRichard Tran Mills } 620d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 621df555b71SRichard Tran Mills 62250a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 623ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 624ff03dc53SRichard Tran Mills { 625ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 626ff03dc53SRichard Tran Mills const PetscScalar *x; 627ff03dc53SRichard Tran Mills PetscScalar *y,*z; 628ff03dc53SRichard Tran Mills const MatScalar *aa; 629ff03dc53SRichard Tran Mills PetscErrorCode ierr; 630ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 631db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 632ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 633ff03dc53SRichard Tran Mills PetscInt i; 634ff03dc53SRichard Tran Mills 635ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 636ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 637a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 638db63039fSRichard Tran Mills PetscScalar beta; 639a84739b8SRichard Tran Mills char matdescra[6]; 6404a2a386eSRichard Tran Mills 6414a2a386eSRichard Tran Mills PetscFunctionBegin; 642a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 643a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 644a84739b8SRichard Tran Mills 6454a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6464a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6474a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6484a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6494a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6504a2a386eSRichard Tran Mills 6514a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 652a84739b8SRichard Tran Mills if (zz == yy) { 653a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 654db63039fSRichard Tran Mills beta = 1.0; 655969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 656a84739b8SRichard Tran Mills } else { 657db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 658db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 659db63039fSRichard Tran Mills beta = 0.0; 660db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 661969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6624a2a386eSRichard Tran Mills z[i] += y[i]; 6634a2a386eSRichard Tran Mills } 664a84739b8SRichard Tran Mills } 6654a2a386eSRichard Tran Mills 6664a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 6674a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6684a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6694a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6704a2a386eSRichard Tran Mills } 67150a5026bSRichard Tran Mills #endif 6724a2a386eSRichard Tran Mills 673d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 674df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 675df555b71SRichard Tran Mills { 676df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 677df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 678df555b71SRichard Tran Mills const PetscScalar *x; 679df555b71SRichard Tran Mills PetscScalar *y,*z; 680df555b71SRichard Tran Mills PetscErrorCode ierr; 681969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 682df555b71SRichard Tran Mills PetscInt i; 683551aa5c8SRichard Tran Mills PetscObjectState state; 684df555b71SRichard Tran Mills 685df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 686df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 687df555b71SRichard Tran Mills 688df555b71SRichard Tran Mills PetscFunctionBegin; 689df555b71SRichard Tran Mills 69038987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 69138987b35SRichard Tran Mills if(!a->nz) { 69238987b35SRichard Tran Mills PetscInt i; 69338987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 69438987b35SRichard Tran Mills for (i=0; i<n; i++) { 69538987b35SRichard Tran Mills z[i] = y[i]; 69638987b35SRichard Tran Mills } 69738987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 69838987b35SRichard Tran Mills PetscFunctionReturn(0); 69938987b35SRichard Tran Mills } 700f36dfe3fSRichard Tran Mills 701df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 702df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 703df555b71SRichard Tran Mills 7043fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 7053fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 7063fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 707551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 708551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 7093fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 7103fa15762SRichard Tran Mills } 7113fa15762SRichard Tran Mills 712df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 713df555b71SRichard Tran Mills if (zz == yy) { 714df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 715df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 716db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 7179c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 718df555b71SRichard Tran Mills } else { 719df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 720df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 721db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 7229c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 723969800c5SRichard Tran Mills for (i=0; i<n; i++) { 724df555b71SRichard Tran Mills z[i] += y[i]; 725df555b71SRichard Tran Mills } 726df555b71SRichard Tran Mills } 727df555b71SRichard Tran Mills 728df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 729df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 730df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 731df555b71SRichard Tran Mills PetscFunctionReturn(0); 732df555b71SRichard Tran Mills } 733d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 734df555b71SRichard Tran Mills 73545fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 736aab60f1bSRichard Tran Mills /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 737aab60f1bSRichard Tran Mills * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 7383ecbffd0SRichard Tran Mills * For releases of MKL prior to version 18, update 2: 739aab60f1bSRichard Tran Mills * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 740aab60f1bSRichard Tran Mills * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 741aab60f1bSRichard Tran Mills * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 742aab60f1bSRichard Tran Mills * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 74345fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 74445fbe478SRichard Tran Mills { 74545fbe478SRichard Tran Mills Mat_SeqAIJMKL *a, *b; 74645fbe478SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 74745fbe478SRichard Tran Mills PetscErrorCode ierr; 74845fbe478SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 749551aa5c8SRichard Tran Mills PetscObjectState state; 75045fbe478SRichard Tran Mills 75145fbe478SRichard Tran Mills PetscFunctionBegin; 75245fbe478SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 75345fbe478SRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 754551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 755551aa5c8SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 75645fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 75745fbe478SRichard Tran Mills } 758551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 759551aa5c8SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 76045fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 76145fbe478SRichard Tran Mills } 76245fbe478SRichard Tran Mills csrA = a->csrA; 76345fbe478SRichard Tran Mills csrB = b->csrA; 76445fbe478SRichard Tran Mills 76545fbe478SRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 7669c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 76745fbe478SRichard Tran Mills 7686c87cf42SRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 76945fbe478SRichard Tran Mills 77045fbe478SRichard Tran Mills PetscFunctionReturn(0); 77145fbe478SRichard Tran Mills } 77245fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 77345fbe478SRichard Tran Mills 774e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 775e8be1fc7SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 776e8be1fc7SRichard Tran Mills { 777e8be1fc7SRichard Tran Mills Mat_SeqAIJMKL *a, *b, *c; 778e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 779e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 780e8be1fc7SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 781e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 782e8be1fc7SRichard Tran Mills PetscObjectState state; 783e8be1fc7SRichard Tran Mills 784e8be1fc7SRichard Tran Mills PetscFunctionBegin; 785e8be1fc7SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 786e8be1fc7SRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 787e8be1fc7SRichard Tran Mills c = (Mat_SeqAIJMKL*)C->spptr; 788e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 789e8be1fc7SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 790e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 791e8be1fc7SRichard Tran Mills } 792e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 793e8be1fc7SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 794e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 795e8be1fc7SRichard Tran Mills } 796e8be1fc7SRichard Tran Mills csrA = a->csrA; 797e8be1fc7SRichard Tran Mills csrB = b->csrA; 798e8be1fc7SRichard Tran Mills csrC = c->csrA; 799e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 800e8be1fc7SRichard Tran Mills 801e8be1fc7SRichard Tran Mills stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 802e8be1fc7SRichard Tran Mills SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 803e8be1fc7SRichard Tran Mills SPARSE_STAGE_FINALIZE_MULT,&csrC); 804e8be1fc7SRichard Tran Mills 805e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 806e8be1fc7SRichard Tran Mills 807e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 8084f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 809e8be1fc7SRichard Tran Mills 810e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 811e8be1fc7SRichard Tran Mills } 812e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 813e8be1fc7SRichard Tran Mills 814372ec6bbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 815372ec6bbSRichard Tran Mills PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 816372ec6bbSRichard Tran Mills { 817372ec6bbSRichard Tran Mills Mat_SeqAIJMKL *a, *b; 818372ec6bbSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 819372ec6bbSRichard Tran Mills PetscErrorCode ierr; 820372ec6bbSRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 821551aa5c8SRichard Tran Mills PetscObjectState state; 822372ec6bbSRichard Tran Mills 823372ec6bbSRichard Tran Mills PetscFunctionBegin; 824372ec6bbSRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 825372ec6bbSRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 826551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 827551aa5c8SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 828372ec6bbSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 829372ec6bbSRichard Tran Mills } 830551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 831551aa5c8SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 832372ec6bbSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 833372ec6bbSRichard Tran Mills } 834372ec6bbSRichard Tran Mills csrA = a->csrA; 835372ec6bbSRichard Tran Mills csrB = b->csrA; 836372ec6bbSRichard Tran Mills 837372ec6bbSRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 8389c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 839372ec6bbSRichard Tran Mills 840372ec6bbSRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 841372ec6bbSRichard Tran Mills 842372ec6bbSRichard Tran Mills PetscFunctionReturn(0); 843372ec6bbSRichard Tran Mills } 844372ec6bbSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 845372ec6bbSRichard Tran Mills 8464f53af40SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 8474f53af40SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 8484f53af40SRichard Tran Mills { 8494f53af40SRichard Tran Mills Mat_SeqAIJMKL *a, *p, *c; 8504f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8514f53af40SRichard Tran Mills PetscBool set, flag; 8524f53af40SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 853b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 8544f53af40SRichard Tran Mills PetscObjectState state; 8554f53af40SRichard Tran Mills PetscErrorCode ierr; 8564f53af40SRichard Tran Mills 8574f53af40SRichard Tran Mills PetscFunctionBegin; 8584f53af40SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag); 8594f53af40SRichard Tran Mills if (!set || (set && !flag)) { 8604f53af40SRichard Tran Mills ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 8614f53af40SRichard Tran Mills PetscFunctionReturn(0); 8624f53af40SRichard Tran Mills } 8634f53af40SRichard Tran Mills 8644f53af40SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 8654f53af40SRichard Tran Mills p = (Mat_SeqAIJMKL*)P->spptr; 8664f53af40SRichard Tran Mills c = (Mat_SeqAIJMKL*)C->spptr; 8674f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 8684f53af40SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 8694f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 8704f53af40SRichard Tran Mills } 8714f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 8724f53af40SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 8734f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 8744f53af40SRichard Tran Mills } 8754f53af40SRichard Tran Mills csrA = a->csrA; 8764f53af40SRichard Tran Mills csrP = p->csrA; 8774f53af40SRichard Tran Mills csrC = c->csrA; 878b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 879b9e1dd46SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 880b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8814f53af40SRichard Tran Mills 882f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 883b9e1dd46SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 8844f53af40SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 8854f53af40SRichard Tran Mills 8864f53af40SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 8874f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 8884f53af40SRichard Tran Mills 8894f53af40SRichard Tran Mills PetscFunctionReturn(0); 8904f53af40SRichard Tran Mills } 8914f53af40SRichard Tran Mills #endif 8924f53af40SRichard Tran Mills 8934f53af40SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 8944f53af40SRichard Tran Mills PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 8954f53af40SRichard Tran Mills { 8964f53af40SRichard Tran Mills Mat_SeqAIJMKL *a, *p; 8974f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8984f53af40SRichard Tran Mills PetscBool set, flag; 8994f53af40SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 900b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 9014f53af40SRichard Tran Mills PetscObjectState state; 9024f53af40SRichard Tran Mills PetscErrorCode ierr; 9034f53af40SRichard Tran Mills 9044f53af40SRichard Tran Mills PetscFunctionBegin; 9054f53af40SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag); 9064f53af40SRichard Tran Mills if (!set || (set && !flag)) { 9074f53af40SRichard Tran Mills ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 9084f53af40SRichard Tran Mills PetscFunctionReturn(0); 9094f53af40SRichard Tran Mills } 9104f53af40SRichard Tran Mills 9114f53af40SRichard Tran Mills if (scall == MAT_REUSE_MATRIX) { 9124f53af40SRichard Tran Mills ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 9134f53af40SRichard Tran Mills PetscFunctionReturn(0); 9144f53af40SRichard Tran Mills } 9154f53af40SRichard Tran Mills 9164f53af40SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 9174f53af40SRichard Tran Mills p = (Mat_SeqAIJMKL*)P->spptr; 9184f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 9194f53af40SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 9204f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 9214f53af40SRichard Tran Mills } 9224f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 9234f53af40SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 9244f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 9254f53af40SRichard Tran Mills } 9264f53af40SRichard Tran Mills csrA = a->csrA; 9274f53af40SRichard Tran Mills csrP = p->csrA; 928b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 929b9e1dd46SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 930b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 9314f53af40SRichard Tran Mills 932f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 933b9e1dd46SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT); 9344f53af40SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 9354f53af40SRichard Tran Mills 9364f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 9374f53af40SRichard Tran Mills ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 9384f53af40SRichard Tran Mills 9394f53af40SRichard Tran Mills PetscFunctionReturn(0); 9404f53af40SRichard Tran Mills } 9414f53af40SRichard Tran Mills #endif 9424f53af40SRichard Tran Mills 9434a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 9444a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 9454a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 9464a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 9474a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 9484a2a386eSRichard Tran Mills { 9494a2a386eSRichard Tran Mills PetscErrorCode ierr; 9504a2a386eSRichard Tran Mills Mat B = *newmat; 9514a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 952c9d46305SRichard Tran Mills PetscBool set; 953e9c94282SRichard Tran Mills PetscBool sametype; 9544a2a386eSRichard Tran Mills 9554a2a386eSRichard Tran Mills PetscFunctionBegin; 9564a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 9574a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 9584a2a386eSRichard Tran Mills } 9594a2a386eSRichard Tran Mills 960e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 961e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 962e9c94282SRichard Tran Mills 9634a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 9644a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 9654a2a386eSRichard Tran Mills 966df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 967969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 9684a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 9694a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 9704a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 971c9d46305SRichard Tran Mills 9724abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 973d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 974d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 975a8327b06SKarl Rupp #else 976d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 977d995685eSRichard Tran Mills #endif 9785b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 9794abfa3b3SRichard Tran Mills 9804abfa3b3SRichard Tran Mills /* Parse command line options. */ 981c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 982c9d46305SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 9835b49642aSRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 984c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 985d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 986d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 987d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 988d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 989d995685eSRichard Tran Mills } 990d995685eSRichard Tran Mills #endif 991c9d46305SRichard Tran Mills 992d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 993df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 994969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 995df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 996969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 99745fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 998e8be1fc7SRichard Tran Mills # ifdef PETSC_HAVE_MKL_SPARSE_SP2M 999e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 10004f53af40SRichard Tran Mills # ifndef PETSC_USE_COMPLEX 10014f53af40SRichard Tran Mills B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 10024f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 10034f53af40SRichard Tran Mills # endif 1004e8be1fc7SRichard Tran Mills # endif 1005a557fde5SRichard Tran Mills B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 100650a5026bSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 100750a5026bSRichard Tran Mills 100850a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 100950a5026bSRichard Tran Mills /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were 101050a5026bSRichard Tran Mills * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new 101150a5026bSRichard Tran Mills * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the 101250a5026bSRichard Tran Mills * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old 101350a5026bSRichard Tran Mills * interface. */ 101450a5026bSRichard Tran Mills if (aijmkl->no_SpMV2) { 10154a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1016969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 10174a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1018969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1019c9d46305SRichard Tran Mills } 102050a5026bSRichard Tran Mills #endif 10214a2a386eSRichard Tran Mills 10224a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1023e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1024e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1025e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 102645fbe478SRichard Tran Mills if(!aijmkl->no_SpMV2) { 102745fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 102845fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1029e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1030e8be1fc7SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1031e8be1fc7SRichard Tran Mills #endif 1032372ec6bbSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 103345fbe478SRichard Tran Mills #endif 103445fbe478SRichard Tran Mills } 10354a2a386eSRichard Tran Mills 10364a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 10374a2a386eSRichard Tran Mills *newmat = B; 10384a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10394a2a386eSRichard Tran Mills } 10404a2a386eSRichard Tran Mills 10414a2a386eSRichard Tran Mills /*@C 10424a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 10434a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 10444a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 104590147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 104690147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 1047597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1048597ee276SRichard Tran Mills symmetric A) operations are currently supported. 1049597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 105090147e49SRichard Tran Mills 10514a2a386eSRichard Tran Mills Collective on MPI_Comm 10524a2a386eSRichard Tran Mills 10534a2a386eSRichard Tran Mills Input Parameters: 10544a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 10554a2a386eSRichard Tran Mills . m - number of rows 10564a2a386eSRichard Tran Mills . n - number of columns 10574a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 10584a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 10594a2a386eSRichard Tran Mills (possibly different for each row) or NULL 10604a2a386eSRichard Tran Mills 10614a2a386eSRichard Tran Mills Output Parameter: 10624a2a386eSRichard Tran Mills . A - the matrix 10634a2a386eSRichard Tran Mills 106490147e49SRichard Tran Mills Options Database Keys: 106566b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 106666b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 106790147e49SRichard Tran Mills 10684a2a386eSRichard Tran Mills Notes: 10694a2a386eSRichard Tran Mills If nnz is given then nz is ignored 10704a2a386eSRichard Tran Mills 10714a2a386eSRichard Tran Mills Level: intermediate 10724a2a386eSRichard Tran Mills 107390147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel 10744a2a386eSRichard Tran Mills 10754a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 10764a2a386eSRichard Tran Mills @*/ 10774a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 10784a2a386eSRichard Tran Mills { 10794a2a386eSRichard Tran Mills PetscErrorCode ierr; 10804a2a386eSRichard Tran Mills 10814a2a386eSRichard Tran Mills PetscFunctionBegin; 10824a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 10834a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 10844a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 10854a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 10864a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10874a2a386eSRichard Tran Mills } 10884a2a386eSRichard Tran Mills 10894a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 10904a2a386eSRichard Tran Mills { 10914a2a386eSRichard Tran Mills PetscErrorCode ierr; 10924a2a386eSRichard Tran Mills 10934a2a386eSRichard Tran Mills PetscFunctionBegin; 10944a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 10954a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 10964a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10974a2a386eSRichard Tran Mills } 1098