14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 16c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 175b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 184abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills PetscErrorCode ierr; 324a2a386eSRichard Tran Mills Mat B = *newmat; 33a8327b06SKarl Rupp #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 344a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35a8327b06SKarl Rupp #endif 364a2a386eSRichard Tran Mills 374a2a386eSRichard Tran Mills PetscFunctionBegin; 384a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 394a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 404a2a386eSRichard Tran Mills } 414a2a386eSRichard Tran Mills 424a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4354871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 444a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4854871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 49ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 5045fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51372ec6bbSRichard Tran Mills B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 5287c2a1d7SRichard Tran Mills B->ops->scale = MatScale_SeqAIJ; 5387c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 5487c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 5587c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJ; 564a2a386eSRichard Tran Mills 57e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 6145fbe478SRichard Tran Mills if(!aijmkl->no_SpMV2) { 6245fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 6345fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64372ec6bbSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 6545fbe478SRichard Tran Mills #endif 6645fbe478SRichard Tran Mills } 67e9c94282SRichard Tran Mills 684abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 70e9c94282SRichard Tran Mills * the spptr pointer. */ 714abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 72a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 73a8327b06SKarl Rupp 744abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 750632b357SRichard Tran Mills sparse_status_t stat; 764abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 774abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 784abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 794abfa3b3SRichard Tran Mills } 804abfa3b3SRichard Tran Mills } 814abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 82e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 834a2a386eSRichard Tran Mills 844a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 854a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 864a2a386eSRichard Tran Mills 874a2a386eSRichard Tran Mills *newmat = B; 884a2a386eSRichard Tran Mills PetscFunctionReturn(0); 894a2a386eSRichard Tran Mills } 904a2a386eSRichard Tran Mills 914a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 924a2a386eSRichard Tran Mills { 934a2a386eSRichard Tran Mills PetscErrorCode ierr; 944a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 954a2a386eSRichard Tran Mills 964a2a386eSRichard Tran Mills PetscFunctionBegin; 97e9c94282SRichard Tran Mills 98e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 99e9c94282SRichard Tran Mills * spptr pointer. */ 100e9c94282SRichard Tran Mills if (aijmkl) { 1014a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 1024abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1034abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 1044abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 1054abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1064abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1074abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1084abfa3b3SRichard Tran Mills } 1094abfa3b3SRichard Tran Mills } 1104abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1114a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 112e9c94282SRichard Tran Mills } 1134a2a386eSRichard Tran Mills 1144a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1154a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1164a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1174a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1184a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1194a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1204a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1214a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1224a2a386eSRichard Tran Mills } 1234a2a386eSRichard Tran Mills 1245b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1255b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1265b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1275b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1285b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1295b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1305b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1316e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1324a2a386eSRichard Tran Mills { 1336e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1346e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1356e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1366e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 13745fbe478SRichard Tran Mills PetscFunctionBegin; 1386e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1396e369cd5SRichard Tran Mills #else 140a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 141a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 142a8327b06SKarl Rupp PetscInt m,n; 143a8327b06SKarl Rupp MatScalar *aa; 144a8327b06SKarl Rupp PetscInt *aj,*ai; 1456e369cd5SRichard Tran Mills sparse_status_t stat; 1464a2a386eSRichard Tran Mills 147a8327b06SKarl Rupp PetscFunctionBegin; 1486e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1496e369cd5SRichard Tran Mills 1500632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1510632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1520632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1530632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1540632b357SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1550632b357SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1560632b357SRichard Tran Mills } 1570632b357SRichard Tran Mills } 1588d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1596e369cd5SRichard Tran Mills 160c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 161df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 162df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 163df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 16458678438SRichard Tran Mills m = A->rmap->n; 16558678438SRichard Tran Mills n = A->cmap->n; 166df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 167df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 168df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 16980095d54SIrina Sokolova if ((a->nz!=0) & !(A->structure_only)) { 1708d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1718d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 17258678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 173df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 174df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 176df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 177f68ad4bdSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 178df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 179df555b71SRichard Tran Mills } 1804abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 181c9d46305SRichard Tran Mills } 1826e369cd5SRichard Tran Mills 1836e369cd5SRichard Tran Mills PetscFunctionReturn(0); 184d995685eSRichard Tran Mills #endif 1856e369cd5SRichard Tran Mills } 1866e369cd5SRichard Tran Mills 18719afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 18819afcda9SRichard Tran Mills * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 1896c87cf42SRichard Tran Mills * matrix handle. 1906c87cf42SRichard Tran Mills * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity 1916c87cf42SRichard Tran Mills * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult() 1926c87cf42SRichard Tran Mills * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case. 1936c87cf42SRichard Tran Mills * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 1946c87cf42SRichard Tran Mills * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 1956c87cf42SRichard Tran Mills * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 1966c87cf42SRichard Tran Mills * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 19719afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1986c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 19919afcda9SRichard Tran Mills { 20019afcda9SRichard Tran Mills PetscErrorCode ierr; 20119afcda9SRichard Tran Mills sparse_status_t stat; 20219afcda9SRichard Tran Mills sparse_index_base_t indexing; 20319afcda9SRichard Tran Mills PetscInt nrows, ncols; 20445fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 20519afcda9SRichard Tran Mills MatScalar *aa; 20619afcda9SRichard Tran Mills Mat A; 2076c87cf42SRichard Tran Mills Mat_SeqAIJ *a; 20819afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 20919afcda9SRichard Tran Mills 21045fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 21145fbe478SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 21219afcda9SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 21319afcda9SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 21419afcda9SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 21519afcda9SRichard Tran Mills } 2166c87cf42SRichard Tran Mills 2176c87cf42SRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 21819afcda9SRichard Tran Mills ierr = MatCreate(comm,&A);CHKERRQ(ierr); 21919afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 22045fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 22119afcda9SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 22219afcda9SRichard Tran Mills 22319afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 22419afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 22519afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2266c87cf42SRichard Tran Mills } else { 2276c87cf42SRichard Tran Mills A = *mat; 2286c87cf42SRichard Tran Mills } 2296c87cf42SRichard Tran Mills 2306c87cf42SRichard Tran Mills a = (Mat_SeqAIJ*)A->data; 23119afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2326c87cf42SRichard Tran Mills 2336c87cf42SRichard Tran Mills if (reuse == MAT_REUSE_MATRIX) { 2346c87cf42SRichard Tran Mills /* Need to destroy old MKL handle. */ 2356c87cf42SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 2366c87cf42SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 2376c87cf42SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 2386c87cf42SRichard Tran Mills } 2396c87cf42SRichard Tran Mills 2406c87cf42SRichard Tran Mills /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */ 2416c87cf42SRichard Tran Mills ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr); 2426c87cf42SRichard Tran Mills } 24319afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2446c87cf42SRichard Tran Mills 24519afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 24619afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 24719afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 248f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 249f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 250f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 25119afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 25219afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 25319afcda9SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 25419afcda9SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 25519afcda9SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 25619afcda9SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 25719afcda9SRichard Tran Mills } 25819afcda9SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 25919afcda9SRichard Tran Mills 26019afcda9SRichard Tran Mills *mat = A; 26119afcda9SRichard Tran Mills PetscFunctionReturn(0); 26219afcda9SRichard Tran Mills } 26319afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 26419afcda9SRichard Tran Mills 2656e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 2666e369cd5SRichard Tran Mills { 2676e369cd5SRichard Tran Mills PetscErrorCode ierr; 2686e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2696e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 2706e369cd5SRichard Tran Mills 2716e369cd5SRichard Tran Mills PetscFunctionBegin; 2726e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 2736e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2746e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 2756e369cd5SRichard Tran Mills ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 2766e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 2775b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 2786e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 2795b49642aSRichard Tran Mills } 2806e369cd5SRichard Tran Mills PetscFunctionReturn(0); 2816e369cd5SRichard Tran Mills } 2826e369cd5SRichard Tran Mills 2836e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 2846e369cd5SRichard Tran Mills { 2856e369cd5SRichard Tran Mills PetscErrorCode ierr; 2866e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2875b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2886e369cd5SRichard Tran Mills 2896e369cd5SRichard Tran Mills PetscFunctionBegin; 2906e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2916e369cd5SRichard Tran Mills 2926e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 2936e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 2946e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 2956e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 296d96e85feSRichard Tran Mills * a lot of code duplication. */ 2976e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 2986e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 2996e369cd5SRichard Tran Mills 3005b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3015b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3025b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 3035b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3046e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 305886913bfSRichard Tran Mills } else if (aijmkl->sparse_optimized) { 306886913bfSRichard Tran Mills /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 307886913bfSRichard Tran Mills * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 308886913bfSRichard Tran Mills * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 309*7225e97aSRichard Tran Mills * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 310886913bfSRichard Tran Mills sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 311886913bfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 312886913bfSRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 313886913bfSRichard Tran Mills } 314886913bfSRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 3155b49642aSRichard Tran Mills } 316df555b71SRichard Tran Mills 3174a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3184a2a386eSRichard Tran Mills } 3194a2a386eSRichard Tran Mills 3204a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3214a2a386eSRichard Tran Mills { 3224a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3234a2a386eSRichard Tran Mills const PetscScalar *x; 3244a2a386eSRichard Tran Mills PetscScalar *y; 3254a2a386eSRichard Tran Mills const MatScalar *aa; 3264a2a386eSRichard Tran Mills PetscErrorCode ierr; 3274a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 328db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 329db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 330db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3314a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 332db63039fSRichard Tran Mills char matdescra[6]; 333db63039fSRichard Tran Mills 3344a2a386eSRichard Tran Mills 3354a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 336ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 337ff03dc53SRichard Tran Mills 338ff03dc53SRichard Tran Mills PetscFunctionBegin; 339db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 340db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 341ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 342ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 343ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 344ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 345ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 346ff03dc53SRichard Tran Mills 347ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 348db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 349ff03dc53SRichard Tran Mills 350ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 351ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 352ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 354ff03dc53SRichard Tran Mills } 355ff03dc53SRichard Tran Mills 356d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 357df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 358df555b71SRichard Tran Mills { 359df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 360df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 361df555b71SRichard Tran Mills const PetscScalar *x; 362df555b71SRichard Tran Mills PetscScalar *y; 363df555b71SRichard Tran Mills PetscErrorCode ierr; 364df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 365df555b71SRichard Tran Mills 366df555b71SRichard Tran Mills PetscFunctionBegin; 367df555b71SRichard Tran Mills 36838987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 36938987b35SRichard Tran Mills if(!a->nz) { 37038987b35SRichard Tran Mills PetscInt i; 37138987b35SRichard Tran Mills PetscInt m=A->rmap->n; 37238987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 37338987b35SRichard Tran Mills for (i=0; i<m; i++) { 37438987b35SRichard Tran Mills y[i] = 0.0; 37538987b35SRichard Tran Mills } 37638987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 37738987b35SRichard Tran Mills PetscFunctionReturn(0); 37838987b35SRichard Tran Mills } 379f36dfe3fSRichard Tran Mills 380df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 381df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 382df555b71SRichard Tran Mills 3833fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3843fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3853fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3863fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 3873fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 3883fa15762SRichard Tran Mills } 3893fa15762SRichard Tran Mills 390df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 391df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 392df555b71SRichard Tran Mills 393df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 394df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 395df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 396df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 397df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 398df555b71SRichard Tran Mills } 399df555b71SRichard Tran Mills PetscFunctionReturn(0); 400df555b71SRichard Tran Mills } 401d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 402df555b71SRichard Tran Mills 403ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 404ff03dc53SRichard Tran Mills { 405ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 406ff03dc53SRichard Tran Mills const PetscScalar *x; 407ff03dc53SRichard Tran Mills PetscScalar *y; 408ff03dc53SRichard Tran Mills const MatScalar *aa; 409ff03dc53SRichard Tran Mills PetscErrorCode ierr; 410ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 411db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 412db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 413db63039fSRichard Tran Mills PetscScalar beta = 0.0; 414ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 415db63039fSRichard Tran Mills char matdescra[6]; 416ff03dc53SRichard Tran Mills 417ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 418ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4194a2a386eSRichard Tran Mills 4204a2a386eSRichard Tran Mills PetscFunctionBegin; 421969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 422969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4234a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4244a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 4254a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4264a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4274a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4284a2a386eSRichard Tran Mills 4294a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 430db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4314a2a386eSRichard Tran Mills 4324a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 4334a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4344a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 4354a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4364a2a386eSRichard Tran Mills } 4374a2a386eSRichard Tran Mills 438d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 439df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 440df555b71SRichard Tran Mills { 441df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 442df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 443df555b71SRichard Tran Mills const PetscScalar *x; 444df555b71SRichard Tran Mills PetscScalar *y; 445df555b71SRichard Tran Mills PetscErrorCode ierr; 4460632b357SRichard Tran Mills sparse_status_t stat; 447df555b71SRichard Tran Mills 448df555b71SRichard Tran Mills PetscFunctionBegin; 449df555b71SRichard Tran Mills 45038987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 45138987b35SRichard Tran Mills if(!a->nz) { 45238987b35SRichard Tran Mills PetscInt i; 45338987b35SRichard Tran Mills PetscInt n=A->cmap->n; 45438987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 45538987b35SRichard Tran Mills for (i=0; i<n; i++) { 45638987b35SRichard Tran Mills y[i] = 0.0; 45738987b35SRichard Tran Mills } 45838987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 45938987b35SRichard Tran Mills PetscFunctionReturn(0); 46038987b35SRichard Tran Mills } 461f36dfe3fSRichard Tran Mills 462df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 463df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 464df555b71SRichard Tran Mills 4653fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4663fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4673fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4683fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 4693fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4703fa15762SRichard Tran Mills } 4713fa15762SRichard Tran Mills 472df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 473df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 474df555b71SRichard Tran Mills 475df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 476df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 477df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 478df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 479df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 480df555b71SRichard Tran Mills } 481df555b71SRichard Tran Mills PetscFunctionReturn(0); 482df555b71SRichard Tran Mills } 483d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 484df555b71SRichard Tran Mills 4854a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 4864a2a386eSRichard Tran Mills { 4874a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4884a2a386eSRichard Tran Mills const PetscScalar *x; 4894a2a386eSRichard Tran Mills PetscScalar *y,*z; 4904a2a386eSRichard Tran Mills const MatScalar *aa; 4914a2a386eSRichard Tran Mills PetscErrorCode ierr; 4924a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 493db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 4944a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 4954a2a386eSRichard Tran Mills PetscInt i; 4964a2a386eSRichard Tran Mills 497ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 498ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 499a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 500db63039fSRichard Tran Mills PetscScalar beta; 501a84739b8SRichard Tran Mills char matdescra[6]; 502ff03dc53SRichard Tran Mills 503ff03dc53SRichard Tran Mills PetscFunctionBegin; 504a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 505a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 506a84739b8SRichard Tran Mills 507ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 508ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 509ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 510ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 511ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 512ff03dc53SRichard Tran Mills 513ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 514a84739b8SRichard Tran Mills if (zz == yy) { 515a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 516db63039fSRichard Tran Mills beta = 1.0; 517db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 518a84739b8SRichard Tran Mills } else { 519db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 520db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 521db63039fSRichard Tran Mills beta = 0.0; 522db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 523ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 524ff03dc53SRichard Tran Mills z[i] += y[i]; 525ff03dc53SRichard Tran Mills } 526a84739b8SRichard Tran Mills } 527ff03dc53SRichard Tran Mills 528ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 529ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 530ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 531ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 532ff03dc53SRichard Tran Mills } 533ff03dc53SRichard Tran Mills 534d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 535df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 536df555b71SRichard Tran Mills { 537df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 538df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 539df555b71SRichard Tran Mills const PetscScalar *x; 540df555b71SRichard Tran Mills PetscScalar *y,*z; 541df555b71SRichard Tran Mills PetscErrorCode ierr; 542df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 543df555b71SRichard Tran Mills PetscInt i; 544df555b71SRichard Tran Mills 545df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 546df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 547df555b71SRichard Tran Mills 548df555b71SRichard Tran Mills PetscFunctionBegin; 549df555b71SRichard Tran Mills 55038987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 55138987b35SRichard Tran Mills if(!a->nz) { 55238987b35SRichard Tran Mills PetscInt i; 55338987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 55438987b35SRichard Tran Mills for (i=0; i<m; i++) { 55538987b35SRichard Tran Mills z[i] = y[i]; 55638987b35SRichard Tran Mills } 55738987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 55838987b35SRichard Tran Mills PetscFunctionReturn(0); 55938987b35SRichard Tran Mills } 560df555b71SRichard Tran Mills 561df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 562df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 563df555b71SRichard Tran Mills 5643fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5653fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5663fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5673fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 5683fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5693fa15762SRichard Tran Mills } 5703fa15762SRichard Tran Mills 571df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 572df555b71SRichard Tran Mills if (zz == yy) { 573df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 574df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 575db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 576df555b71SRichard Tran Mills } else { 577df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 578df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 579db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 580df555b71SRichard Tran Mills for (i=0; i<m; i++) { 581df555b71SRichard Tran Mills z[i] += y[i]; 582df555b71SRichard Tran Mills } 583df555b71SRichard Tran Mills } 584df555b71SRichard Tran Mills 585df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 586df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 587df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 588df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 589df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 590df555b71SRichard Tran Mills } 591df555b71SRichard Tran Mills PetscFunctionReturn(0); 592df555b71SRichard Tran Mills } 593d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 594df555b71SRichard Tran Mills 595ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 596ff03dc53SRichard Tran Mills { 597ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 598ff03dc53SRichard Tran Mills const PetscScalar *x; 599ff03dc53SRichard Tran Mills PetscScalar *y,*z; 600ff03dc53SRichard Tran Mills const MatScalar *aa; 601ff03dc53SRichard Tran Mills PetscErrorCode ierr; 602ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 603db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 604ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 605ff03dc53SRichard Tran Mills PetscInt i; 606ff03dc53SRichard Tran Mills 607ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 608ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 609a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 610db63039fSRichard Tran Mills PetscScalar beta; 611a84739b8SRichard Tran Mills char matdescra[6]; 6124a2a386eSRichard Tran Mills 6134a2a386eSRichard Tran Mills PetscFunctionBegin; 614a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 615a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 616a84739b8SRichard Tran Mills 6174a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6184a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6194a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6204a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6214a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6224a2a386eSRichard Tran Mills 6234a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 624a84739b8SRichard Tran Mills if (zz == yy) { 625a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 626db63039fSRichard Tran Mills beta = 1.0; 627969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 628a84739b8SRichard Tran Mills } else { 629db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 630db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 631db63039fSRichard Tran Mills beta = 0.0; 632db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 633969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6344a2a386eSRichard Tran Mills z[i] += y[i]; 6354a2a386eSRichard Tran Mills } 636a84739b8SRichard Tran Mills } 6374a2a386eSRichard Tran Mills 6384a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 6394a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6404a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6414a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6424a2a386eSRichard Tran Mills } 6434a2a386eSRichard Tran Mills 644d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 645df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 646df555b71SRichard Tran Mills { 647df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 649df555b71SRichard Tran Mills const PetscScalar *x; 650df555b71SRichard Tran Mills PetscScalar *y,*z; 651df555b71SRichard Tran Mills PetscErrorCode ierr; 652969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 653df555b71SRichard Tran Mills PetscInt i; 654df555b71SRichard Tran Mills 655df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 656df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 657df555b71SRichard Tran Mills 658df555b71SRichard Tran Mills PetscFunctionBegin; 659df555b71SRichard Tran Mills 66038987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 66138987b35SRichard Tran Mills if(!a->nz) { 66238987b35SRichard Tran Mills PetscInt i; 66338987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 66438987b35SRichard Tran Mills for (i=0; i<n; i++) { 66538987b35SRichard Tran Mills z[i] = y[i]; 66638987b35SRichard Tran Mills } 66738987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 66838987b35SRichard Tran Mills PetscFunctionReturn(0); 66938987b35SRichard Tran Mills } 670f36dfe3fSRichard Tran Mills 671df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 672df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 673df555b71SRichard Tran Mills 6743fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6753fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6763fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 6773fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 6783fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 6793fa15762SRichard Tran Mills } 6803fa15762SRichard Tran Mills 681df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 682df555b71SRichard Tran Mills if (zz == yy) { 683df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 684df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 685db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 686df555b71SRichard Tran Mills } else { 687df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 688df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 689db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 690969800c5SRichard Tran Mills for (i=0; i<n; i++) { 691df555b71SRichard Tran Mills z[i] += y[i]; 692df555b71SRichard Tran Mills } 693df555b71SRichard Tran Mills } 694df555b71SRichard Tran Mills 695df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 696df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 697df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 698df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 699df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 700df555b71SRichard Tran Mills } 701df555b71SRichard Tran Mills PetscFunctionReturn(0); 702df555b71SRichard Tran Mills } 703d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 704df555b71SRichard Tran Mills 70545fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 70645fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 70745fbe478SRichard Tran Mills { 70845fbe478SRichard Tran Mills Mat_SeqAIJMKL *a, *b; 70945fbe478SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 71045fbe478SRichard Tran Mills PetscErrorCode ierr; 71145fbe478SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 71245fbe478SRichard Tran Mills 71345fbe478SRichard Tran Mills PetscFunctionBegin; 71445fbe478SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 71545fbe478SRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 71645fbe478SRichard Tran Mills if (!a->sparse_optimized) { 71745fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 71845fbe478SRichard Tran Mills } 71945fbe478SRichard Tran Mills if (!b->sparse_optimized) { 72045fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 72145fbe478SRichard Tran Mills } 72245fbe478SRichard Tran Mills csrA = a->csrA; 72345fbe478SRichard Tran Mills csrB = b->csrA; 72445fbe478SRichard Tran Mills 72545fbe478SRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 72645fbe478SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 72745fbe478SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 72845fbe478SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 72945fbe478SRichard Tran Mills } 73045fbe478SRichard Tran Mills 7316c87cf42SRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 73245fbe478SRichard Tran Mills 73345fbe478SRichard Tran Mills PetscFunctionReturn(0); 73445fbe478SRichard Tran Mills } 73545fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 73645fbe478SRichard Tran Mills 737372ec6bbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 738372ec6bbSRichard Tran Mills PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 739372ec6bbSRichard Tran Mills { 740372ec6bbSRichard Tran Mills Mat_SeqAIJMKL *a, *b; 741372ec6bbSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 742372ec6bbSRichard Tran Mills PetscErrorCode ierr; 743372ec6bbSRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 744372ec6bbSRichard Tran Mills 745372ec6bbSRichard Tran Mills PetscFunctionBegin; 746372ec6bbSRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 747372ec6bbSRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 748372ec6bbSRichard Tran Mills if (!a->sparse_optimized) { 749372ec6bbSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 750372ec6bbSRichard Tran Mills } 751372ec6bbSRichard Tran Mills if (!b->sparse_optimized) { 752372ec6bbSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 753372ec6bbSRichard Tran Mills } 754372ec6bbSRichard Tran Mills csrA = a->csrA; 755372ec6bbSRichard Tran Mills csrB = b->csrA; 756372ec6bbSRichard Tran Mills 757372ec6bbSRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 758372ec6bbSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 759372ec6bbSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 760372ec6bbSRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 761372ec6bbSRichard Tran Mills } 762372ec6bbSRichard Tran Mills 763372ec6bbSRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 764372ec6bbSRichard Tran Mills 765372ec6bbSRichard Tran Mills PetscFunctionReturn(0); 766372ec6bbSRichard Tran Mills } 767372ec6bbSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 768372ec6bbSRichard Tran Mills 76987c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 770db63039fSRichard Tran Mills { 771db63039fSRichard Tran Mills PetscErrorCode ierr; 772db63039fSRichard Tran Mills 77387c2a1d7SRichard Tran Mills PetscFunctionBegin; 774db63039fSRichard Tran Mills ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 775db63039fSRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 776db63039fSRichard Tran Mills PetscFunctionReturn(0); 777db63039fSRichard Tran Mills } 778df555b71SRichard Tran Mills 77987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 78087c2a1d7SRichard Tran Mills { 78187c2a1d7SRichard Tran Mills PetscErrorCode ierr; 78287c2a1d7SRichard Tran Mills 78387c2a1d7SRichard Tran Mills PetscFunctionBegin; 78487c2a1d7SRichard Tran Mills ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 78587c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 78687c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 78787c2a1d7SRichard Tran Mills } 78887c2a1d7SRichard Tran Mills 78987c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 79087c2a1d7SRichard Tran Mills { 79187c2a1d7SRichard Tran Mills PetscErrorCode ierr; 79287c2a1d7SRichard Tran Mills 79387c2a1d7SRichard Tran Mills PetscFunctionBegin; 79487c2a1d7SRichard Tran Mills ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 79587c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 79687c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 79787c2a1d7SRichard Tran Mills } 79887c2a1d7SRichard Tran Mills 79987c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 80087c2a1d7SRichard Tran Mills { 80187c2a1d7SRichard Tran Mills PetscErrorCode ierr; 80287c2a1d7SRichard Tran Mills 80387c2a1d7SRichard Tran Mills PetscFunctionBegin; 80487c2a1d7SRichard Tran Mills ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 80587c2a1d7SRichard Tran Mills if (str == SAME_NONZERO_PATTERN) { 80687c2a1d7SRichard Tran Mills /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 80787c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 80887c2a1d7SRichard Tran Mills } 80987c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 81087c2a1d7SRichard Tran Mills } 81187c2a1d7SRichard Tran Mills 8124a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 8134a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 8144a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 8154a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 8164a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 8174a2a386eSRichard Tran Mills { 8184a2a386eSRichard Tran Mills PetscErrorCode ierr; 8194a2a386eSRichard Tran Mills Mat B = *newmat; 8204a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 821c9d46305SRichard Tran Mills PetscBool set; 822e9c94282SRichard Tran Mills PetscBool sametype; 8234a2a386eSRichard Tran Mills 8244a2a386eSRichard Tran Mills PetscFunctionBegin; 8254a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 8264a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 8274a2a386eSRichard Tran Mills } 8284a2a386eSRichard Tran Mills 829e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 830e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 831e9c94282SRichard Tran Mills 8324a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 8334a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 8344a2a386eSRichard Tran Mills 835df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 836969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 8374a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 8384a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 8394a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 840c9d46305SRichard Tran Mills 8414abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 842d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 843d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 844a8327b06SKarl Rupp #else 845d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 846d995685eSRichard Tran Mills #endif 8475b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 8484abfa3b3SRichard Tran Mills 8494abfa3b3SRichard Tran Mills /* Parse command line options. */ 850c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 851c9d46305SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 8525b49642aSRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 853c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 854d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 855d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 856d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 857d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 858d995685eSRichard Tran Mills } 859d995685eSRichard Tran Mills #endif 860c9d46305SRichard Tran Mills 861c9d46305SRichard Tran Mills if(!aijmkl->no_SpMV2) { 862d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 863df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 864969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 865df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 866969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 86745fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 868372ec6bbSRichard Tran Mills B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 869d995685eSRichard Tran Mills #endif 870c9d46305SRichard Tran Mills } else { 8714a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 872969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 8734a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 874969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 875c9d46305SRichard Tran Mills } 8764a2a386eSRichard Tran Mills 877db63039fSRichard Tran Mills B->ops->scale = MatScale_SeqAIJMKL; 87887c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 87987c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 88087c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJMKL; 881db63039fSRichard Tran Mills 882db63039fSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 8834a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 884e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 885e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 886e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 88745fbe478SRichard Tran Mills if(!aijmkl->no_SpMV2) { 88845fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 88945fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 890372ec6bbSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 89145fbe478SRichard Tran Mills #endif 89245fbe478SRichard Tran Mills } 8934a2a386eSRichard Tran Mills 8944a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 8954a2a386eSRichard Tran Mills *newmat = B; 8964a2a386eSRichard Tran Mills PetscFunctionReturn(0); 8974a2a386eSRichard Tran Mills } 8984a2a386eSRichard Tran Mills 8994a2a386eSRichard Tran Mills /*@C 9004a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 9014a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 9024a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 90390147e49SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 90490147e49SRichard Tran Mills operations are currently supported. 90590147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 90690147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 90790147e49SRichard Tran Mills 9084a2a386eSRichard Tran Mills Collective on MPI_Comm 9094a2a386eSRichard Tran Mills 9104a2a386eSRichard Tran Mills Input Parameters: 9114a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 9124a2a386eSRichard Tran Mills . m - number of rows 9134a2a386eSRichard Tran Mills . n - number of columns 9144a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 9154a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 9164a2a386eSRichard Tran Mills (possibly different for each row) or NULL 9174a2a386eSRichard Tran Mills 9184a2a386eSRichard Tran Mills Output Parameter: 9194a2a386eSRichard Tran Mills . A - the matrix 9204a2a386eSRichard Tran Mills 92190147e49SRichard Tran Mills Options Database Keys: 92290147e49SRichard Tran Mills . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 92390147e49SRichard Tran Mills 9244a2a386eSRichard Tran Mills Notes: 9254a2a386eSRichard Tran Mills If nnz is given then nz is ignored 9264a2a386eSRichard Tran Mills 9274a2a386eSRichard Tran Mills Level: intermediate 9284a2a386eSRichard Tran Mills 92990147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel 9304a2a386eSRichard Tran Mills 9314a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 9324a2a386eSRichard Tran Mills @*/ 9334a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 9344a2a386eSRichard Tran Mills { 9354a2a386eSRichard Tran Mills PetscErrorCode ierr; 9364a2a386eSRichard Tran Mills 9374a2a386eSRichard Tran Mills PetscFunctionBegin; 9384a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 9394a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 9404a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 9414a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 9424a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9434a2a386eSRichard Tran Mills } 9444a2a386eSRichard Tran Mills 9454a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 9464a2a386eSRichard Tran Mills { 9474a2a386eSRichard Tran Mills PetscErrorCode ierr; 9484a2a386eSRichard Tran Mills 9494a2a386eSRichard Tran Mills PetscFunctionBegin; 9504a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 9514a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 9524a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9534a2a386eSRichard Tran Mills } 954