14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 16c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 175b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 184abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19*551aa5c8SRichard Tran Mills PetscObjectState state; 20b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22df555b71SRichard Tran Mills struct matrix_descr descr; 23b8cbc1fbSRichard Tran Mills #endif 244a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 254a2a386eSRichard Tran Mills 264a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 274a2a386eSRichard Tran Mills 284a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 294a2a386eSRichard Tran Mills { 304a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 314a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 324a2a386eSRichard Tran Mills PetscErrorCode ierr; 334a2a386eSRichard Tran Mills Mat B = *newmat; 34c1d5218aSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 354a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36c1d5218aSRichard Tran Mills #endif 374a2a386eSRichard Tran Mills 384a2a386eSRichard Tran Mills PetscFunctionBegin; 394a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 404a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 414a2a386eSRichard Tran Mills } 424a2a386eSRichard Tran Mills 434a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4454871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 464a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4754871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 48ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4954871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 50ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 5145fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52372ec6bbSRichard Tran Mills B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 5387c2a1d7SRichard Tran Mills B->ops->scale = MatScale_SeqAIJ; 5487c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 5587c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 5687c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJ; 574a2a386eSRichard Tran Mills 58e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 59e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 6245fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 634a940b00SSatish Balay if(!aijmkl->no_SpMV2) { 6445fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65372ec6bbSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 6645fbe478SRichard Tran Mills } 67e9c94282SRichard Tran Mills 684abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 70e9c94282SRichard Tran Mills * the spptr pointer. */ 71a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 72a8327b06SKarl Rupp 734abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 740632b357SRichard Tran Mills sparse_status_t stat; 754abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 764abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 774abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 784abfa3b3SRichard Tran Mills } 794abfa3b3SRichard Tran Mills } 804abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 824a2a386eSRichard Tran Mills 834a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 844a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 854a2a386eSRichard Tran Mills 864a2a386eSRichard Tran Mills *newmat = B; 874a2a386eSRichard Tran Mills PetscFunctionReturn(0); 884a2a386eSRichard Tran Mills } 894a2a386eSRichard Tran Mills 904a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 914a2a386eSRichard Tran Mills { 924a2a386eSRichard Tran Mills PetscErrorCode ierr; 934a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 944a2a386eSRichard Tran Mills 954a2a386eSRichard Tran Mills PetscFunctionBegin; 96e9c94282SRichard Tran Mills 97e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98e9c94282SRichard Tran Mills * spptr pointer. */ 99e9c94282SRichard Tran Mills if (aijmkl) { 1004a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 1014abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1024abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 1034abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 1044abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1054abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1064abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1074abfa3b3SRichard Tran Mills } 1084abfa3b3SRichard Tran Mills } 1094abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1104a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 111e9c94282SRichard Tran Mills } 1124a2a386eSRichard Tran Mills 1134a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1144a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1154a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1164a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1174a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1184a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1194a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1204a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1214a2a386eSRichard Tran Mills } 1224a2a386eSRichard Tran Mills 1235b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1245b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1255b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1265b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1275b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1285b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1295b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1306e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1314a2a386eSRichard Tran Mills { 1326e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1336e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1346e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1356e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 13645fbe478SRichard Tran Mills PetscFunctionBegin; 1376e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1386e369cd5SRichard Tran Mills #else 139a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 140a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 141a8327b06SKarl Rupp PetscInt m,n; 142a8327b06SKarl Rupp MatScalar *aa; 143a8327b06SKarl Rupp PetscInt *aj,*ai; 1446e369cd5SRichard Tran Mills sparse_status_t stat; 145*551aa5c8SRichard Tran Mills PetscErrorCode ierr; 146*551aa5c8SRichard Tran Mills PetscObjectState state; 1474a2a386eSRichard Tran Mills 148a8327b06SKarl Rupp PetscFunctionBegin; 1496e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1506e369cd5SRichard Tran Mills 1510632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1520632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1530632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1540632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1550632b357SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1560632b357SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1570632b357SRichard Tran Mills } 1580632b357SRichard Tran Mills } 1598d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1606e369cd5SRichard Tran Mills 161c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 162df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 163df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 164df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 16558678438SRichard Tran Mills m = A->rmap->n; 16658678438SRichard Tran Mills n = A->cmap->n; 167df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 168df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 169df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 17080095d54SIrina Sokolova if ((a->nz!=0) & !(A->structure_only)) { 1718d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1728d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 17358678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 174df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 175df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 176df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 177df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 178f68ad4bdSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 179df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 180df555b71SRichard Tran Mills } 1814abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 182c9d46305SRichard Tran Mills } 183*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 1846e369cd5SRichard Tran Mills 1856e369cd5SRichard Tran Mills PetscFunctionReturn(0); 186d995685eSRichard Tran Mills #endif 1876e369cd5SRichard Tran Mills } 1886e369cd5SRichard Tran Mills 18919afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 19019afcda9SRichard Tran Mills * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 1916c87cf42SRichard Tran Mills * matrix handle. 192aab60f1bSRichard Tran Mills * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 193aab60f1bSRichard Tran Mills * there is no good alternative. */ 19419afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1956c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 19619afcda9SRichard Tran Mills { 19719afcda9SRichard Tran Mills PetscErrorCode ierr; 19819afcda9SRichard Tran Mills sparse_status_t stat; 19919afcda9SRichard Tran Mills sparse_index_base_t indexing; 20019afcda9SRichard Tran Mills PetscInt nrows, ncols; 20145fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 20219afcda9SRichard Tran Mills MatScalar *aa; 20319afcda9SRichard Tran Mills Mat A; 2046c87cf42SRichard Tran Mills Mat_SeqAIJ *a; 20519afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 20619afcda9SRichard Tran Mills 20745fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 20845fbe478SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 20919afcda9SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 21019afcda9SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 21119afcda9SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 21219afcda9SRichard Tran Mills } 2136c87cf42SRichard Tran Mills 214aab60f1bSRichard Tran Mills if (reuse == MAT_REUSE_MATRIX) { 215aab60f1bSRichard Tran Mills ierr = MatDestroy(mat);CHKERRQ(ierr); 216aab60f1bSRichard Tran Mills } 21719afcda9SRichard Tran Mills ierr = MatCreate(comm,&A);CHKERRQ(ierr); 21819afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 21945fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 220aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 221aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 222aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 223aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 22419afcda9SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 22519afcda9SRichard Tran Mills 22619afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 22719afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 22819afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2296c87cf42SRichard Tran Mills 2306c87cf42SRichard Tran Mills a = (Mat_SeqAIJ*)A->data; 23119afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 23219afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2336c87cf42SRichard Tran Mills 23419afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 23519afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 23619afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 237f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 238f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 239f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 24019afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 24119afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 24219afcda9SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 24319afcda9SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 24419afcda9SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 24519afcda9SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 24619afcda9SRichard Tran Mills } 24719afcda9SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 24819afcda9SRichard Tran Mills 24919afcda9SRichard Tran Mills *mat = A; 25019afcda9SRichard Tran Mills PetscFunctionReturn(0); 25119afcda9SRichard Tran Mills } 25219afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 25319afcda9SRichard Tran Mills 2546e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 2556e369cd5SRichard Tran Mills { 2566e369cd5SRichard Tran Mills PetscErrorCode ierr; 2576e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2586e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 2596e369cd5SRichard Tran Mills 2606e369cd5SRichard Tran Mills PetscFunctionBegin; 2616e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 2626e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2636e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 2646e369cd5SRichard Tran Mills ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 2656e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 2665b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 2676e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 2685b49642aSRichard Tran Mills } 2696e369cd5SRichard Tran Mills PetscFunctionReturn(0); 2706e369cd5SRichard Tran Mills } 2716e369cd5SRichard Tran Mills 2726e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 2736e369cd5SRichard Tran Mills { 2746e369cd5SRichard Tran Mills PetscErrorCode ierr; 2756e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2765b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2776e369cd5SRichard Tran Mills 2786e369cd5SRichard Tran Mills PetscFunctionBegin; 2796e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2806e369cd5SRichard Tran Mills 2816e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 2826e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 2836e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 2846e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 285d96e85feSRichard Tran Mills * a lot of code duplication. */ 2866e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 2876e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 2886e369cd5SRichard Tran Mills 2895b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 2905b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 2915b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2925b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 2936e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 2944a940b00SSatish Balay #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 295886913bfSRichard Tran Mills } else if (aijmkl->sparse_optimized) { 296886913bfSRichard Tran Mills /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 297886913bfSRichard Tran Mills * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 298886913bfSRichard Tran Mills * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 2997225e97aSRichard Tran Mills * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 300886913bfSRichard Tran Mills sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 301886913bfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 302886913bfSRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 303886913bfSRichard Tran Mills } 304886913bfSRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 3054a940b00SSatish Balay #endif 3065b49642aSRichard Tran Mills } 307df555b71SRichard Tran Mills 3084a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3094a2a386eSRichard Tran Mills } 3104a2a386eSRichard Tran Mills 3114a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3124a2a386eSRichard Tran Mills { 3134a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3144a2a386eSRichard Tran Mills const PetscScalar *x; 3154a2a386eSRichard Tran Mills PetscScalar *y; 3164a2a386eSRichard Tran Mills const MatScalar *aa; 3174a2a386eSRichard Tran Mills PetscErrorCode ierr; 3184a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 319db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 320db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 321db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3224a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 323db63039fSRichard Tran Mills char matdescra[6]; 324db63039fSRichard Tran Mills 3254a2a386eSRichard Tran Mills 3264a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 327ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 328ff03dc53SRichard Tran Mills 329ff03dc53SRichard Tran Mills PetscFunctionBegin; 330db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 331db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 332ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 333ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 334ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 335ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 336ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 337ff03dc53SRichard Tran Mills 338ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 339db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 340ff03dc53SRichard Tran Mills 341ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 342ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 343ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 345ff03dc53SRichard Tran Mills } 346ff03dc53SRichard Tran Mills 347d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 348df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 349df555b71SRichard Tran Mills { 350df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 351df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 352df555b71SRichard Tran Mills const PetscScalar *x; 353df555b71SRichard Tran Mills PetscScalar *y; 354df555b71SRichard Tran Mills PetscErrorCode ierr; 355df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 356*551aa5c8SRichard Tran Mills PetscObjectState state; 357df555b71SRichard Tran Mills 358df555b71SRichard Tran Mills PetscFunctionBegin; 359df555b71SRichard Tran Mills 36038987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 36138987b35SRichard Tran Mills if(!a->nz) { 36238987b35SRichard Tran Mills PetscInt i; 36338987b35SRichard Tran Mills PetscInt m=A->rmap->n; 36438987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 36538987b35SRichard Tran Mills for (i=0; i<m; i++) { 36638987b35SRichard Tran Mills y[i] = 0.0; 36738987b35SRichard Tran Mills } 36838987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 36938987b35SRichard Tran Mills PetscFunctionReturn(0); 37038987b35SRichard Tran Mills } 371f36dfe3fSRichard Tran Mills 372df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 373df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 374df555b71SRichard Tran Mills 3753fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3763fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3773fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 378*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 379*551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 3803fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 3813fa15762SRichard Tran Mills } 3823fa15762SRichard Tran Mills 383df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 384df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 385df555b71SRichard Tran Mills 386df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 387df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 388df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 389df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 390df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 391df555b71SRichard Tran Mills } 392df555b71SRichard Tran Mills PetscFunctionReturn(0); 393df555b71SRichard Tran Mills } 394d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 395df555b71SRichard Tran Mills 396ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 397ff03dc53SRichard Tran Mills { 398ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 399ff03dc53SRichard Tran Mills const PetscScalar *x; 400ff03dc53SRichard Tran Mills PetscScalar *y; 401ff03dc53SRichard Tran Mills const MatScalar *aa; 402ff03dc53SRichard Tran Mills PetscErrorCode ierr; 403ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 404db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 405db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 406db63039fSRichard Tran Mills PetscScalar beta = 0.0; 407ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 408db63039fSRichard Tran Mills char matdescra[6]; 409ff03dc53SRichard Tran Mills 410ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 411ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4124a2a386eSRichard Tran Mills 4134a2a386eSRichard Tran Mills PetscFunctionBegin; 414969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 415969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4164a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4174a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 4184a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4194a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4204a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4214a2a386eSRichard Tran Mills 4224a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 423db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4244a2a386eSRichard Tran Mills 4254a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 4264a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4274a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 4284a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4294a2a386eSRichard Tran Mills } 4304a2a386eSRichard Tran Mills 431d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 432df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 433df555b71SRichard Tran Mills { 434df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 435df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 436df555b71SRichard Tran Mills const PetscScalar *x; 437df555b71SRichard Tran Mills PetscScalar *y; 438df555b71SRichard Tran Mills PetscErrorCode ierr; 4390632b357SRichard Tran Mills sparse_status_t stat; 440*551aa5c8SRichard Tran Mills PetscObjectState state; 441df555b71SRichard Tran Mills 442df555b71SRichard Tran Mills PetscFunctionBegin; 443df555b71SRichard Tran Mills 44438987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 44538987b35SRichard Tran Mills if(!a->nz) { 44638987b35SRichard Tran Mills PetscInt i; 44738987b35SRichard Tran Mills PetscInt n=A->cmap->n; 44838987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 44938987b35SRichard Tran Mills for (i=0; i<n; i++) { 45038987b35SRichard Tran Mills y[i] = 0.0; 45138987b35SRichard Tran Mills } 45238987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 45338987b35SRichard Tran Mills PetscFunctionReturn(0); 45438987b35SRichard Tran Mills } 455f36dfe3fSRichard Tran Mills 456df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 457df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 458df555b71SRichard Tran Mills 4593fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4603fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4613fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 462*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 463*551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 4643fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4653fa15762SRichard Tran Mills } 4663fa15762SRichard Tran Mills 467df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 468df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 469df555b71SRichard Tran Mills 470df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 471df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 472df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 473df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 474df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 475df555b71SRichard Tran Mills } 476df555b71SRichard Tran Mills PetscFunctionReturn(0); 477df555b71SRichard Tran Mills } 478d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 479df555b71SRichard Tran Mills 4804a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 4814a2a386eSRichard Tran Mills { 4824a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4834a2a386eSRichard Tran Mills const PetscScalar *x; 4844a2a386eSRichard Tran Mills PetscScalar *y,*z; 4854a2a386eSRichard Tran Mills const MatScalar *aa; 4864a2a386eSRichard Tran Mills PetscErrorCode ierr; 4874a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 488db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 4894a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 4904a2a386eSRichard Tran Mills PetscInt i; 4914a2a386eSRichard Tran Mills 492ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 493ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 494a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 495db63039fSRichard Tran Mills PetscScalar beta; 496a84739b8SRichard Tran Mills char matdescra[6]; 497ff03dc53SRichard Tran Mills 498ff03dc53SRichard Tran Mills PetscFunctionBegin; 499a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 500a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 501a84739b8SRichard Tran Mills 502ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 503ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 504ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 505ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 506ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 507ff03dc53SRichard Tran Mills 508ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 509a84739b8SRichard Tran Mills if (zz == yy) { 510a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 511db63039fSRichard Tran Mills beta = 1.0; 512db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 513a84739b8SRichard Tran Mills } else { 514db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 515db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 516db63039fSRichard Tran Mills beta = 0.0; 517db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 518ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 519ff03dc53SRichard Tran Mills z[i] += y[i]; 520ff03dc53SRichard Tran Mills } 521a84739b8SRichard Tran Mills } 522ff03dc53SRichard Tran Mills 523ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 524ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 525ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 526ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 527ff03dc53SRichard Tran Mills } 528ff03dc53SRichard Tran Mills 529d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 530df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 531df555b71SRichard Tran Mills { 532df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 533df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 534df555b71SRichard Tran Mills const PetscScalar *x; 535df555b71SRichard Tran Mills PetscScalar *y,*z; 536df555b71SRichard Tran Mills PetscErrorCode ierr; 537df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 538df555b71SRichard Tran Mills PetscInt i; 539df555b71SRichard Tran Mills 540df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 541df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 542*551aa5c8SRichard Tran Mills PetscObjectState state; 543df555b71SRichard Tran Mills 544df555b71SRichard Tran Mills PetscFunctionBegin; 545df555b71SRichard Tran Mills 54638987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 54738987b35SRichard Tran Mills if(!a->nz) { 54838987b35SRichard Tran Mills PetscInt i; 54938987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 55038987b35SRichard Tran Mills for (i=0; i<m; i++) { 55138987b35SRichard Tran Mills z[i] = y[i]; 55238987b35SRichard Tran Mills } 55338987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 55438987b35SRichard Tran Mills PetscFunctionReturn(0); 55538987b35SRichard Tran Mills } 556df555b71SRichard Tran Mills 557df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 558df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 559df555b71SRichard Tran Mills 5603fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5613fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5623fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 563*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 564*551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 5653fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5663fa15762SRichard Tran Mills } 5673fa15762SRichard Tran Mills 568df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 569df555b71SRichard Tran Mills if (zz == yy) { 570df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 571df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 572db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 573df555b71SRichard Tran Mills } else { 574df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 575df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 576db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 577df555b71SRichard Tran Mills for (i=0; i<m; i++) { 578df555b71SRichard Tran Mills z[i] += y[i]; 579df555b71SRichard Tran Mills } 580df555b71SRichard Tran Mills } 581df555b71SRichard Tran Mills 582df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 583df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 584df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 585df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 586df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 587df555b71SRichard Tran Mills } 588df555b71SRichard Tran Mills PetscFunctionReturn(0); 589df555b71SRichard Tran Mills } 590d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 591df555b71SRichard Tran Mills 592ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 593ff03dc53SRichard Tran Mills { 594ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595ff03dc53SRichard Tran Mills const PetscScalar *x; 596ff03dc53SRichard Tran Mills PetscScalar *y,*z; 597ff03dc53SRichard Tran Mills const MatScalar *aa; 598ff03dc53SRichard Tran Mills PetscErrorCode ierr; 599ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 600db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 601ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 602ff03dc53SRichard Tran Mills PetscInt i; 603ff03dc53SRichard Tran Mills 604ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 605ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 606a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 607db63039fSRichard Tran Mills PetscScalar beta; 608a84739b8SRichard Tran Mills char matdescra[6]; 6094a2a386eSRichard Tran Mills 6104a2a386eSRichard Tran Mills PetscFunctionBegin; 611a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 612a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 613a84739b8SRichard Tran Mills 6144a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6154a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6164a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6174a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6184a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6194a2a386eSRichard Tran Mills 6204a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 621a84739b8SRichard Tran Mills if (zz == yy) { 622a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 623db63039fSRichard Tran Mills beta = 1.0; 624969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 625a84739b8SRichard Tran Mills } else { 626db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 627db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 628db63039fSRichard Tran Mills beta = 0.0; 629db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 630969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6314a2a386eSRichard Tran Mills z[i] += y[i]; 6324a2a386eSRichard Tran Mills } 633a84739b8SRichard Tran Mills } 6344a2a386eSRichard Tran Mills 6354a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 6364a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6374a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6384a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6394a2a386eSRichard Tran Mills } 6404a2a386eSRichard Tran Mills 641d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 642df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 643df555b71SRichard Tran Mills { 644df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 645df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 646df555b71SRichard Tran Mills const PetscScalar *x; 647df555b71SRichard Tran Mills PetscScalar *y,*z; 648df555b71SRichard Tran Mills PetscErrorCode ierr; 649969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 650df555b71SRichard Tran Mills PetscInt i; 651*551aa5c8SRichard Tran Mills PetscObjectState state; 652df555b71SRichard Tran Mills 653df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 654df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 655df555b71SRichard Tran Mills 656df555b71SRichard Tran Mills PetscFunctionBegin; 657df555b71SRichard Tran Mills 65838987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 65938987b35SRichard Tran Mills if(!a->nz) { 66038987b35SRichard Tran Mills PetscInt i; 66138987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 66238987b35SRichard Tran Mills for (i=0; i<n; i++) { 66338987b35SRichard Tran Mills z[i] = y[i]; 66438987b35SRichard Tran Mills } 66538987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 66638987b35SRichard Tran Mills PetscFunctionReturn(0); 66738987b35SRichard Tran Mills } 668f36dfe3fSRichard Tran Mills 669df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 670df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 671df555b71SRichard Tran Mills 6723fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6733fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6743fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 675*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 676*551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 6773fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 6783fa15762SRichard Tran Mills } 6793fa15762SRichard Tran Mills 680df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 681df555b71SRichard Tran Mills if (zz == yy) { 682df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 683df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 684db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 685df555b71SRichard Tran Mills } else { 686df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 687df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 688db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 689969800c5SRichard Tran Mills for (i=0; i<n; i++) { 690df555b71SRichard Tran Mills z[i] += y[i]; 691df555b71SRichard Tran Mills } 692df555b71SRichard Tran Mills } 693df555b71SRichard Tran Mills 694df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 695df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 696df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 697df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 698df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 699df555b71SRichard Tran Mills } 700df555b71SRichard Tran Mills PetscFunctionReturn(0); 701df555b71SRichard Tran Mills } 702d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 703df555b71SRichard Tran Mills 70445fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 705aab60f1bSRichard Tran Mills /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 706aab60f1bSRichard Tran Mills * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 707aab60f1bSRichard Tran Mills * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 708aab60f1bSRichard Tran Mills * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 709aab60f1bSRichard Tran Mills * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 710aab60f1bSRichard Tran Mills * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 71145fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 71245fbe478SRichard Tran Mills { 71345fbe478SRichard Tran Mills Mat_SeqAIJMKL *a, *b; 71445fbe478SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 71545fbe478SRichard Tran Mills PetscErrorCode ierr; 71645fbe478SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 717*551aa5c8SRichard Tran Mills PetscObjectState state; 71845fbe478SRichard Tran Mills 71945fbe478SRichard Tran Mills PetscFunctionBegin; 72045fbe478SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 72145fbe478SRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 722*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 723*551aa5c8SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 72445fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 72545fbe478SRichard Tran Mills } 726*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 727*551aa5c8SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 72845fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 72945fbe478SRichard Tran Mills } 73045fbe478SRichard Tran Mills csrA = a->csrA; 73145fbe478SRichard Tran Mills csrB = b->csrA; 73245fbe478SRichard Tran Mills 73345fbe478SRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 73445fbe478SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 73545fbe478SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 73645fbe478SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 73745fbe478SRichard Tran Mills } 73845fbe478SRichard Tran Mills 7396c87cf42SRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 74045fbe478SRichard Tran Mills 74145fbe478SRichard Tran Mills PetscFunctionReturn(0); 74245fbe478SRichard Tran Mills } 74345fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 74445fbe478SRichard Tran Mills 745372ec6bbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 746372ec6bbSRichard Tran Mills PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 747372ec6bbSRichard Tran Mills { 748372ec6bbSRichard Tran Mills Mat_SeqAIJMKL *a, *b; 749372ec6bbSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 750372ec6bbSRichard Tran Mills PetscErrorCode ierr; 751372ec6bbSRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 752*551aa5c8SRichard Tran Mills PetscObjectState state; 753372ec6bbSRichard Tran Mills 754372ec6bbSRichard Tran Mills PetscFunctionBegin; 755372ec6bbSRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 756372ec6bbSRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 757*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 758*551aa5c8SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 759372ec6bbSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 760372ec6bbSRichard Tran Mills } 761*551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 762*551aa5c8SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 763372ec6bbSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 764372ec6bbSRichard Tran Mills } 765372ec6bbSRichard Tran Mills csrA = a->csrA; 766372ec6bbSRichard Tran Mills csrB = b->csrA; 767372ec6bbSRichard Tran Mills 768372ec6bbSRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 769372ec6bbSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 770372ec6bbSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 771372ec6bbSRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 772372ec6bbSRichard Tran Mills } 773372ec6bbSRichard Tran Mills 774372ec6bbSRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 775372ec6bbSRichard Tran Mills 776372ec6bbSRichard Tran Mills PetscFunctionReturn(0); 777372ec6bbSRichard Tran Mills } 778372ec6bbSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 779372ec6bbSRichard Tran Mills 78087c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 781db63039fSRichard Tran Mills { 782db63039fSRichard Tran Mills PetscErrorCode ierr; 783db63039fSRichard Tran Mills 78487c2a1d7SRichard Tran Mills PetscFunctionBegin; 785db63039fSRichard Tran Mills ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 786db63039fSRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 787db63039fSRichard Tran Mills PetscFunctionReturn(0); 788db63039fSRichard Tran Mills } 789df555b71SRichard Tran Mills 79087c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 79187c2a1d7SRichard Tran Mills { 79287c2a1d7SRichard Tran Mills PetscErrorCode ierr; 79387c2a1d7SRichard Tran Mills 79487c2a1d7SRichard Tran Mills PetscFunctionBegin; 79587c2a1d7SRichard Tran Mills ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 79687c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 79787c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 79887c2a1d7SRichard Tran Mills } 79987c2a1d7SRichard Tran Mills 80087c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 80187c2a1d7SRichard Tran Mills { 80287c2a1d7SRichard Tran Mills PetscErrorCode ierr; 80387c2a1d7SRichard Tran Mills 80487c2a1d7SRichard Tran Mills PetscFunctionBegin; 80587c2a1d7SRichard Tran Mills ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 80687c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 80787c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 80887c2a1d7SRichard Tran Mills } 80987c2a1d7SRichard Tran Mills 81087c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 81187c2a1d7SRichard Tran Mills { 81287c2a1d7SRichard Tran Mills PetscErrorCode ierr; 81387c2a1d7SRichard Tran Mills 81487c2a1d7SRichard Tran Mills PetscFunctionBegin; 81587c2a1d7SRichard Tran Mills ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 81687c2a1d7SRichard Tran Mills if (str == SAME_NONZERO_PATTERN) { 81787c2a1d7SRichard Tran Mills /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 81887c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 81987c2a1d7SRichard Tran Mills } 82087c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 82187c2a1d7SRichard Tran Mills } 82287c2a1d7SRichard Tran Mills 8234a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 8244a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 8254a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 8264a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 8274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 8284a2a386eSRichard Tran Mills { 8294a2a386eSRichard Tran Mills PetscErrorCode ierr; 8304a2a386eSRichard Tran Mills Mat B = *newmat; 8314a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 832c9d46305SRichard Tran Mills PetscBool set; 833e9c94282SRichard Tran Mills PetscBool sametype; 8344a2a386eSRichard Tran Mills 8354a2a386eSRichard Tran Mills PetscFunctionBegin; 8364a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 8374a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 8384a2a386eSRichard Tran Mills } 8394a2a386eSRichard Tran Mills 840e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 841e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 842e9c94282SRichard Tran Mills 8434a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 8444a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 8454a2a386eSRichard Tran Mills 846df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 847969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 8484a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 8494a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 8504a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 851c9d46305SRichard Tran Mills 8524abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 853d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 854d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 855a8327b06SKarl Rupp #else 856d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 857d995685eSRichard Tran Mills #endif 8585b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 8594abfa3b3SRichard Tran Mills 8604abfa3b3SRichard Tran Mills /* Parse command line options. */ 861c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 862c9d46305SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 8635b49642aSRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 864c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 865d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 866d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 867d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 868d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 869d995685eSRichard Tran Mills } 870d995685eSRichard Tran Mills #endif 871c9d46305SRichard Tran Mills 872c9d46305SRichard Tran Mills if(!aijmkl->no_SpMV2) { 873d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 874df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 875969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 876df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 877969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 87845fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 879a557fde5SRichard Tran Mills B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 880d995685eSRichard Tran Mills #endif 881c9d46305SRichard Tran Mills } else { 8824a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 883969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 8844a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 885969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 886c9d46305SRichard Tran Mills } 8874a2a386eSRichard Tran Mills 888db63039fSRichard Tran Mills B->ops->scale = MatScale_SeqAIJMKL; 88987c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 89087c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 89187c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJMKL; 892db63039fSRichard Tran Mills 893db63039fSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 8944a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 895e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 896e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 897e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 89845fbe478SRichard Tran Mills if(!aijmkl->no_SpMV2) { 89945fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 90045fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 901372ec6bbSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 90245fbe478SRichard Tran Mills #endif 90345fbe478SRichard Tran Mills } 9044a2a386eSRichard Tran Mills 9054a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 9064a2a386eSRichard Tran Mills *newmat = B; 9074a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9084a2a386eSRichard Tran Mills } 9094a2a386eSRichard Tran Mills 9104a2a386eSRichard Tran Mills /*@C 9114a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 9124a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 9134a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 9143af10221SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 91590147e49SRichard Tran Mills operations are currently supported. 91690147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 91790147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 91890147e49SRichard Tran Mills 9194a2a386eSRichard Tran Mills Collective on MPI_Comm 9204a2a386eSRichard Tran Mills 9214a2a386eSRichard Tran Mills Input Parameters: 9224a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 9234a2a386eSRichard Tran Mills . m - number of rows 9244a2a386eSRichard Tran Mills . n - number of columns 9254a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 9264a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 9274a2a386eSRichard Tran Mills (possibly different for each row) or NULL 9284a2a386eSRichard Tran Mills 9294a2a386eSRichard Tran Mills Output Parameter: 9304a2a386eSRichard Tran Mills . A - the matrix 9314a2a386eSRichard Tran Mills 93290147e49SRichard Tran Mills Options Database Keys: 93366b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 93466b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 93590147e49SRichard Tran Mills 9364a2a386eSRichard Tran Mills Notes: 9374a2a386eSRichard Tran Mills If nnz is given then nz is ignored 9384a2a386eSRichard Tran Mills 9394a2a386eSRichard Tran Mills Level: intermediate 9404a2a386eSRichard Tran Mills 94190147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel 9424a2a386eSRichard Tran Mills 9434a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 9444a2a386eSRichard Tran Mills @*/ 9454a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 9464a2a386eSRichard Tran Mills { 9474a2a386eSRichard Tran Mills PetscErrorCode ierr; 9484a2a386eSRichard Tran Mills 9494a2a386eSRichard Tran Mills PetscFunctionBegin; 9504a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 9514a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 9524a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 9534a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 9544a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9554a2a386eSRichard Tran Mills } 9564a2a386eSRichard Tran Mills 9574a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 9584a2a386eSRichard Tran Mills { 9594a2a386eSRichard Tran Mills PetscErrorCode ierr; 9604a2a386eSRichard Tran Mills 9614a2a386eSRichard Tran Mills PetscFunctionBegin; 9624a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 9634a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 9644a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9654a2a386eSRichard Tran Mills } 966