1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills PetscErrorCode ierr; 324a2a386eSRichard Tran Mills Mat B = *newmat; 33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 344a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35c1d5218aSRichard Tran Mills #endif 364a2a386eSRichard Tran Mills 374a2a386eSRichard Tran Mills PetscFunctionBegin; 384a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 394a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 404a2a386eSRichard Tran Mills } 414a2a386eSRichard Tran Mills 424a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4354871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 444a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4854871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 49ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50*431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 51e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 524f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 534a2a386eSRichard Tran Mills 54e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 554222ddf1SHong Zhang 56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 574abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 59e9c94282SRichard Tran Mills * the spptr pointer. */ 60a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61a8327b06SKarl Rupp 624abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 630632b357SRichard Tran Mills sparse_status_t stat; 644abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 659c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 664abfa3b3SRichard Tran Mills } 676718818eSStefano Zampini #endif 68e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 694a2a386eSRichard Tran Mills 704a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 714a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 724a2a386eSRichard Tran Mills 734a2a386eSRichard Tran Mills *newmat = B; 744a2a386eSRichard Tran Mills PetscFunctionReturn(0); 754a2a386eSRichard Tran Mills } 764a2a386eSRichard Tran Mills 774a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 784a2a386eSRichard Tran Mills { 794a2a386eSRichard Tran Mills PetscErrorCode ierr; 804a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 814a2a386eSRichard Tran Mills 824a2a386eSRichard Tran Mills PetscFunctionBegin; 83e9c94282SRichard Tran Mills 84e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 85e9c94282SRichard Tran Mills * spptr pointer. */ 86e9c94282SRichard Tran Mills if (aijmkl) { 874a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 88ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 894abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 904abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 914abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 929c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 934abfa3b3SRichard Tran Mills } 944abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 954a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 96e9c94282SRichard Tran Mills } 974a2a386eSRichard Tran Mills 984a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 994a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1004a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1014a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1024a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1034a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1044a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1054a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1064a2a386eSRichard Tran Mills } 1074a2a386eSRichard Tran Mills 1085b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1095b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1105b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1115b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1125b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1135b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1145b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1156e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1164a2a386eSRichard Tran Mills { 117ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1186e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1196e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1206e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 12145fbe478SRichard Tran Mills PetscFunctionBegin; 1226e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1236e369cd5SRichard Tran Mills #else 124a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 125a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 126a8327b06SKarl Rupp PetscInt m,n; 127a8327b06SKarl Rupp MatScalar *aa; 128a8327b06SKarl Rupp PetscInt *aj,*ai; 1296e369cd5SRichard Tran Mills sparse_status_t stat; 130551aa5c8SRichard Tran Mills PetscErrorCode ierr; 1314a2a386eSRichard Tran Mills 132a8327b06SKarl Rupp PetscFunctionBegin; 133e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 134e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 135e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 136e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 137e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1386e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1394d51fa23SRichard Tran Mills #endif 1406e369cd5SRichard Tran Mills 1410632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1420632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1430632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1440632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1459c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 1460632b357SRichard Tran Mills } 1478d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1486e369cd5SRichard Tran Mills 149c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 150df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 151df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 152df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 15358678438SRichard Tran Mills m = A->rmap->n; 15458678438SRichard Tran Mills n = A->cmap->n; 155df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 156df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 157df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 15846cdef40SRichard Tran Mills if ((a->nz!=0) && aa && !(A->structure_only)) { 1598d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1608d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 16158678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 162e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 163df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 164e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 165df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 166e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 1671950a7e7SRichard Tran Mills if (!aijmkl->no_SpMV2) { 168df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 169e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 1701950a7e7SRichard Tran Mills } 1714abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 172e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 173c9d46305SRichard Tran Mills } 1746e369cd5SRichard Tran Mills 1756e369cd5SRichard Tran Mills PetscFunctionReturn(0); 176d995685eSRichard Tran Mills #endif 1776e369cd5SRichard Tran Mills } 1786e369cd5SRichard Tran Mills 17919afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 18019afcda9SRichard Tran Mills * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 1816c87cf42SRichard Tran Mills * matrix handle. 182aab60f1bSRichard Tran Mills * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 183aab60f1bSRichard Tran Mills * there is no good alternative. */ 184ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1856c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 18619afcda9SRichard Tran Mills { 18719afcda9SRichard Tran Mills PetscErrorCode ierr; 18819afcda9SRichard Tran Mills sparse_status_t stat; 18919afcda9SRichard Tran Mills sparse_index_base_t indexing; 19019afcda9SRichard Tran Mills PetscInt nrows, ncols; 19145fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 19219afcda9SRichard Tran Mills MatScalar *aa; 19319afcda9SRichard Tran Mills Mat A; 19419afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 19519afcda9SRichard Tran Mills 19645fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 19745fbe478SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 1989c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 1996c87cf42SRichard Tran Mills 200aab60f1bSRichard Tran Mills if (reuse == MAT_REUSE_MATRIX) { 201aab60f1bSRichard Tran Mills ierr = MatDestroy(mat);CHKERRQ(ierr); 202aab60f1bSRichard Tran Mills } 20319afcda9SRichard Tran Mills ierr = MatCreate(comm,&A);CHKERRQ(ierr); 20419afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 20545fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 206aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 207aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 208aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 209aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 21019afcda9SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 21119afcda9SRichard Tran Mills 21219afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 21319afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 21419afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2156c87cf42SRichard Tran Mills 21619afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 21719afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2186c87cf42SRichard Tran Mills 21919afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 22019afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 22119afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 222f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 223f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 224f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 22519afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 22651539a68SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 22719afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 22851539a68SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 2291950a7e7SRichard Tran Mills if (!aijmkl->no_SpMV2) { 23019afcda9SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 23151539a68SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 2321950a7e7SRichard Tran Mills } 23319afcda9SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 234e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 23519afcda9SRichard Tran Mills 23619afcda9SRichard Tran Mills *mat = A; 23719afcda9SRichard Tran Mills PetscFunctionReturn(0); 23819afcda9SRichard Tran Mills } 23919afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 24019afcda9SRichard Tran Mills 241e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 242e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 243e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 244ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 245e8be1fc7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 246e8be1fc7SRichard Tran Mills { 247e8be1fc7SRichard Tran Mills PetscInt i; 248e8be1fc7SRichard Tran Mills PetscInt nrows,ncols; 249e8be1fc7SRichard Tran Mills PetscInt nz; 250e8be1fc7SRichard Tran Mills PetscInt *ai,*aj,*dummy; 251e8be1fc7SRichard Tran Mills PetscScalar *aa; 252e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 253e8be1fc7SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 254e8be1fc7SRichard Tran Mills sparse_status_t stat; 255e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 256e8be1fc7SRichard Tran Mills 257e8be1fc7SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 258e8be1fc7SRichard Tran Mills 259e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 260e8be1fc7SRichard Tran Mills stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 261e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 262e8be1fc7SRichard Tran Mills 263e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 264e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 265e8be1fc7SRichard Tran Mills for (i=0; i<nrows; i++) { 266e8be1fc7SRichard Tran Mills nz = ai[i+1] - ai[i]; 267e8be1fc7SRichard Tran Mills ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 268e8be1fc7SRichard Tran Mills } 269e8be1fc7SRichard Tran Mills 270e8be1fc7SRichard Tran Mills ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271e8be1fc7SRichard Tran Mills ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272e8be1fc7SRichard Tran Mills 273e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 274e995cf24SRichard Tran Mills /* We mark our matrix as having a valid, optimized MKL handle. 275e995cf24SRichard Tran Mills * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 276e995cf24SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 277e995cf24SRichard Tran Mills 278e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 279e8be1fc7SRichard Tran Mills } 280e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 281e8be1fc7SRichard Tran Mills 2826e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 2836e369cd5SRichard Tran Mills { 2846e369cd5SRichard Tran Mills PetscErrorCode ierr; 2856e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2866e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 2876e369cd5SRichard Tran Mills 2886e369cd5SRichard Tran Mills PetscFunctionBegin; 2896e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 2906e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2916e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 292580bdb30SBarry Smith ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 2936e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 2945b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 2956e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 2965b49642aSRichard Tran Mills } 2976e369cd5SRichard Tran Mills PetscFunctionReturn(0); 2986e369cd5SRichard Tran Mills } 2996e369cd5SRichard Tran Mills 3006e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 3016e369cd5SRichard Tran Mills { 3026e369cd5SRichard Tran Mills PetscErrorCode ierr; 3036e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3045b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3056e369cd5SRichard Tran Mills 3066e369cd5SRichard Tran Mills PetscFunctionBegin; 3076e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3086e369cd5SRichard Tran Mills 3096e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3106e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3116e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3126e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 313d96e85feSRichard Tran Mills * a lot of code duplication. */ 3146e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3156e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 3166e369cd5SRichard Tran Mills 3175b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3185b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3195b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 3205b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3216e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3225b49642aSRichard Tran Mills } 323df555b71SRichard Tran Mills 3244a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3254a2a386eSRichard Tran Mills } 3264a2a386eSRichard Tran Mills 327e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 3284a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3294a2a386eSRichard Tran Mills { 3304a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3314a2a386eSRichard Tran Mills const PetscScalar *x; 3324a2a386eSRichard Tran Mills PetscScalar *y; 3334a2a386eSRichard Tran Mills const MatScalar *aa; 3344a2a386eSRichard Tran Mills PetscErrorCode ierr; 3354a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 336db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 337db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 338db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3394a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 340db63039fSRichard Tran Mills char matdescra[6]; 341db63039fSRichard Tran Mills 3424a2a386eSRichard Tran Mills 3434a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 344ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 345ff03dc53SRichard Tran Mills 346ff03dc53SRichard Tran Mills PetscFunctionBegin; 347db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 348db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 349ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 350ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 351ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 352ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 353ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 354ff03dc53SRichard Tran Mills 355ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 356db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 357ff03dc53SRichard Tran Mills 358ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 359ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 360ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 361ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 362ff03dc53SRichard Tran Mills } 3631950a7e7SRichard Tran Mills #endif 364ff03dc53SRichard Tran Mills 365ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 366df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 367df555b71SRichard Tran Mills { 368df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 369df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 370df555b71SRichard Tran Mills const PetscScalar *x; 371df555b71SRichard Tran Mills PetscScalar *y; 372df555b71SRichard Tran Mills PetscErrorCode ierr; 373df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 374551aa5c8SRichard Tran Mills PetscObjectState state; 375df555b71SRichard Tran Mills 376df555b71SRichard Tran Mills PetscFunctionBegin; 377df555b71SRichard Tran Mills 37838987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 37938987b35SRichard Tran Mills if(!a->nz) { 38038987b35SRichard Tran Mills PetscInt i; 38138987b35SRichard Tran Mills PetscInt m=A->rmap->n; 38238987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 38338987b35SRichard Tran Mills for (i=0; i<m; i++) { 38438987b35SRichard Tran Mills y[i] = 0.0; 38538987b35SRichard Tran Mills } 38638987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 38738987b35SRichard Tran Mills PetscFunctionReturn(0); 38838987b35SRichard Tran Mills } 389f36dfe3fSRichard Tran Mills 390df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 391df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 392df555b71SRichard Tran Mills 3933fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3943fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3953fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 396551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 397551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 3983fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 3993fa15762SRichard Tran Mills } 4003fa15762SRichard Tran Mills 401df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 402df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 4039c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 404df555b71SRichard Tran Mills 405df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 406df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 407df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 408df555b71SRichard Tran Mills PetscFunctionReturn(0); 409df555b71SRichard Tran Mills } 410d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 411df555b71SRichard Tran Mills 412e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 413ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 414ff03dc53SRichard Tran Mills { 415ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 416ff03dc53SRichard Tran Mills const PetscScalar *x; 417ff03dc53SRichard Tran Mills PetscScalar *y; 418ff03dc53SRichard Tran Mills const MatScalar *aa; 419ff03dc53SRichard Tran Mills PetscErrorCode ierr; 420ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 421db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 422db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 423db63039fSRichard Tran Mills PetscScalar beta = 0.0; 424ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 425db63039fSRichard Tran Mills char matdescra[6]; 426ff03dc53SRichard Tran Mills 427ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 428ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4294a2a386eSRichard Tran Mills 4304a2a386eSRichard Tran Mills PetscFunctionBegin; 431969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 432969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4334a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4344a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 4354a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4364a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4374a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4384a2a386eSRichard Tran Mills 4394a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 440db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4414a2a386eSRichard Tran Mills 4424a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 4434a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4444a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 4454a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4464a2a386eSRichard Tran Mills } 4471950a7e7SRichard Tran Mills #endif 4484a2a386eSRichard Tran Mills 449ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 450df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 451df555b71SRichard Tran Mills { 452df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 453df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 454df555b71SRichard Tran Mills const PetscScalar *x; 455df555b71SRichard Tran Mills PetscScalar *y; 456df555b71SRichard Tran Mills PetscErrorCode ierr; 4570632b357SRichard Tran Mills sparse_status_t stat; 458551aa5c8SRichard Tran Mills PetscObjectState state; 459df555b71SRichard Tran Mills 460df555b71SRichard Tran Mills PetscFunctionBegin; 461df555b71SRichard Tran Mills 46238987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 46338987b35SRichard Tran Mills if(!a->nz) { 46438987b35SRichard Tran Mills PetscInt i; 46538987b35SRichard Tran Mills PetscInt n=A->cmap->n; 46638987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 46738987b35SRichard Tran Mills for (i=0; i<n; i++) { 46838987b35SRichard Tran Mills y[i] = 0.0; 46938987b35SRichard Tran Mills } 47038987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 47138987b35SRichard Tran Mills PetscFunctionReturn(0); 47238987b35SRichard Tran Mills } 473f36dfe3fSRichard Tran Mills 474df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 475df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 476df555b71SRichard Tran Mills 4773fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4783fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4793fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 480551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 481551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 4823fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4833fa15762SRichard Tran Mills } 4843fa15762SRichard Tran Mills 485df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 486df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 4879c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 488df555b71SRichard Tran Mills 489df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 490df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 491df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 492df555b71SRichard Tran Mills PetscFunctionReturn(0); 493df555b71SRichard Tran Mills } 494d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 495df555b71SRichard Tran Mills 496e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 4974a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 4984a2a386eSRichard Tran Mills { 4994a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5004a2a386eSRichard Tran Mills const PetscScalar *x; 5014a2a386eSRichard Tran Mills PetscScalar *y,*z; 5024a2a386eSRichard Tran Mills const MatScalar *aa; 5034a2a386eSRichard Tran Mills PetscErrorCode ierr; 5044a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 505db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 5064a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 5074a2a386eSRichard Tran Mills PetscInt i; 5084a2a386eSRichard Tran Mills 509ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 510ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 511a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 512db63039fSRichard Tran Mills PetscScalar beta; 513a84739b8SRichard Tran Mills char matdescra[6]; 514ff03dc53SRichard Tran Mills 515ff03dc53SRichard Tran Mills PetscFunctionBegin; 516a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 517a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 518a84739b8SRichard Tran Mills 519ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 520ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 521ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 522ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 523ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 524ff03dc53SRichard Tran Mills 525ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 526a84739b8SRichard Tran Mills if (zz == yy) { 527a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 528db63039fSRichard Tran Mills beta = 1.0; 529db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 530a84739b8SRichard Tran Mills } else { 531db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 532db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 533db63039fSRichard Tran Mills beta = 0.0; 534db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 535ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 536ff03dc53SRichard Tran Mills z[i] += y[i]; 537ff03dc53SRichard Tran Mills } 538a84739b8SRichard Tran Mills } 539ff03dc53SRichard Tran Mills 540ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 541ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 542ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 543ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 544ff03dc53SRichard Tran Mills } 5451950a7e7SRichard Tran Mills #endif 546ff03dc53SRichard Tran Mills 547ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 548df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 549df555b71SRichard Tran Mills { 550df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 551df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 552df555b71SRichard Tran Mills const PetscScalar *x; 553df555b71SRichard Tran Mills PetscScalar *y,*z; 554df555b71SRichard Tran Mills PetscErrorCode ierr; 555df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 556df555b71SRichard Tran Mills PetscInt i; 557df555b71SRichard Tran Mills 558df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 559df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 560551aa5c8SRichard Tran Mills PetscObjectState state; 561df555b71SRichard Tran Mills 562df555b71SRichard Tran Mills PetscFunctionBegin; 563df555b71SRichard Tran Mills 56438987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 56538987b35SRichard Tran Mills if(!a->nz) { 56638987b35SRichard Tran Mills PetscInt i; 56738987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 56838987b35SRichard Tran Mills for (i=0; i<m; i++) { 56938987b35SRichard Tran Mills z[i] = y[i]; 57038987b35SRichard Tran Mills } 57138987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 57238987b35SRichard Tran Mills PetscFunctionReturn(0); 57338987b35SRichard Tran Mills } 574df555b71SRichard Tran Mills 575df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 576df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 577df555b71SRichard Tran Mills 5783fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5793fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5803fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 581551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 582551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 5833fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5843fa15762SRichard Tran Mills } 5853fa15762SRichard Tran Mills 586df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 587df555b71SRichard Tran Mills if (zz == yy) { 588df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 589df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 590db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 5919c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 592df555b71SRichard Tran Mills } else { 593df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 594df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 595db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 5969c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 597df555b71SRichard Tran Mills for (i=0; i<m; i++) { 598df555b71SRichard Tran Mills z[i] += y[i]; 599df555b71SRichard Tran Mills } 600df555b71SRichard Tran Mills } 601df555b71SRichard Tran Mills 602df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 603df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 604df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 605df555b71SRichard Tran Mills PetscFunctionReturn(0); 606df555b71SRichard Tran Mills } 607d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 608df555b71SRichard Tran Mills 609e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 610ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 611ff03dc53SRichard Tran Mills { 612ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 613ff03dc53SRichard Tran Mills const PetscScalar *x; 614ff03dc53SRichard Tran Mills PetscScalar *y,*z; 615ff03dc53SRichard Tran Mills const MatScalar *aa; 616ff03dc53SRichard Tran Mills PetscErrorCode ierr; 617ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 618db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 619ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 620ff03dc53SRichard Tran Mills PetscInt i; 621ff03dc53SRichard Tran Mills 622ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 623ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 624a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 625db63039fSRichard Tran Mills PetscScalar beta; 626a84739b8SRichard Tran Mills char matdescra[6]; 6274a2a386eSRichard Tran Mills 6284a2a386eSRichard Tran Mills PetscFunctionBegin; 629a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 630a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 631a84739b8SRichard Tran Mills 6324a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6334a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6344a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6354a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6364a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6374a2a386eSRichard Tran Mills 6384a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 639a84739b8SRichard Tran Mills if (zz == yy) { 640a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 641db63039fSRichard Tran Mills beta = 1.0; 642969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 643a84739b8SRichard Tran Mills } else { 644db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 645db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 646db63039fSRichard Tran Mills beta = 0.0; 647db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 648969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6494a2a386eSRichard Tran Mills z[i] += y[i]; 6504a2a386eSRichard Tran Mills } 651a84739b8SRichard Tran Mills } 6524a2a386eSRichard Tran Mills 6534a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 6544a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 6554a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6564a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6574a2a386eSRichard Tran Mills } 6581950a7e7SRichard Tran Mills #endif 6594a2a386eSRichard Tran Mills 660ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 661df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 662df555b71SRichard Tran Mills { 663df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 664df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 665df555b71SRichard Tran Mills const PetscScalar *x; 666df555b71SRichard Tran Mills PetscScalar *y,*z; 667df555b71SRichard Tran Mills PetscErrorCode ierr; 668969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 669df555b71SRichard Tran Mills PetscInt i; 670551aa5c8SRichard Tran Mills PetscObjectState state; 671df555b71SRichard Tran Mills 672df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 673df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 674df555b71SRichard Tran Mills 675df555b71SRichard Tran Mills PetscFunctionBegin; 676df555b71SRichard Tran Mills 67738987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 67838987b35SRichard Tran Mills if(!a->nz) { 67938987b35SRichard Tran Mills PetscInt i; 68038987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 68138987b35SRichard Tran Mills for (i=0; i<n; i++) { 68238987b35SRichard Tran Mills z[i] = y[i]; 68338987b35SRichard Tran Mills } 68438987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 68538987b35SRichard Tran Mills PetscFunctionReturn(0); 68638987b35SRichard Tran Mills } 687f36dfe3fSRichard Tran Mills 688df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 689df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 690df555b71SRichard Tran Mills 6913fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6923fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6933fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 694551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 695551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 6963fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 6973fa15762SRichard Tran Mills } 6983fa15762SRichard Tran Mills 699df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 700df555b71SRichard Tran Mills if (zz == yy) { 701df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 702df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 703db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 7049c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 705df555b71SRichard Tran Mills } else { 706df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 707df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 708db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 7099c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 710969800c5SRichard Tran Mills for (i=0; i<n; i++) { 711df555b71SRichard Tran Mills z[i] += y[i]; 712df555b71SRichard Tran Mills } 713df555b71SRichard Tran Mills } 714df555b71SRichard Tran Mills 715df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 716df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 717df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 718df555b71SRichard Tran Mills PetscFunctionReturn(0); 719df555b71SRichard Tran Mills } 720d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 721df555b71SRichard Tran Mills 7228a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 723*431879ecSRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,PetscReal fill,Mat C) 724*431879ecSRichard Tran Mills { 725*431879ecSRichard Tran Mills Mat_SeqAIJMKL *a, *b, *c; 726*431879ecSRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 727*431879ecSRichard Tran Mills PetscErrorCode ierr; 728*431879ecSRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 729*431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 730*431879ecSRichard Tran Mills PetscObjectState state; 731*431879ecSRichard Tran Mills 732*431879ecSRichard Tran Mills PetscFunctionBegin; 733*431879ecSRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 734*431879ecSRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 735*431879ecSRichard Tran Mills c = (Mat_SeqAIJMKL*)C->spptr; 736*431879ecSRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 737*431879ecSRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 738*431879ecSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 739*431879ecSRichard Tran Mills } 740*431879ecSRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 741*431879ecSRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 742*431879ecSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 743*431879ecSRichard Tran Mills } 744*431879ecSRichard Tran Mills csrA = a->csrA; 745*431879ecSRichard Tran Mills csrB = b->csrA; 746*431879ecSRichard Tran Mills csrC = c->csrA; 747*431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 748*431879ecSRichard Tran Mills 749*431879ecSRichard Tran Mills stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 750*431879ecSRichard Tran Mills SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 751*431879ecSRichard Tran Mills SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 752*431879ecSRichard Tran Mills 753*431879ecSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply"); 754*431879ecSRichard Tran Mills 755*431879ecSRichard Tran Mills PetscFunctionReturn(0); 756*431879ecSRichard Tran Mills } 757*431879ecSRichard Tran Mills 758e8be1fc7SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 759e8be1fc7SRichard Tran Mills { 760e8be1fc7SRichard Tran Mills Mat_SeqAIJMKL *a, *b, *c; 761e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 762e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 763e8be1fc7SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 764e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 765e8be1fc7SRichard Tran Mills PetscObjectState state; 766e8be1fc7SRichard Tran Mills 767e8be1fc7SRichard Tran Mills PetscFunctionBegin; 768e8be1fc7SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 769e8be1fc7SRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 770e8be1fc7SRichard Tran Mills c = (Mat_SeqAIJMKL*)C->spptr; 771e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 772e8be1fc7SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 773e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 774e8be1fc7SRichard Tran Mills } 775e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 776e8be1fc7SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 777e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 778e8be1fc7SRichard Tran Mills } 779e8be1fc7SRichard Tran Mills csrA = a->csrA; 780e8be1fc7SRichard Tran Mills csrB = b->csrA; 781e8be1fc7SRichard Tran Mills csrC = c->csrA; 782e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 783e8be1fc7SRichard Tran Mills 784e8be1fc7SRichard Tran Mills stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 785e8be1fc7SRichard Tran Mills SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 786e8be1fc7SRichard Tran Mills SPARSE_STAGE_FINALIZE_MULT,&csrC); 787e8be1fc7SRichard Tran Mills 788e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 789e8be1fc7SRichard Tran Mills 790e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 7914f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 792e8be1fc7SRichard Tran Mills 793e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 794e8be1fc7SRichard Tran Mills } 795e8be1fc7SRichard Tran Mills 7964f53af40SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 7974f53af40SRichard Tran Mills { 7984f53af40SRichard Tran Mills Mat_SeqAIJMKL *a, *p, *c; 7994f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8004f53af40SRichard Tran Mills PetscBool set, flag; 8014f53af40SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 802b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 8034f53af40SRichard Tran Mills PetscObjectState state; 8044f53af40SRichard Tran Mills PetscErrorCode ierr; 8054f53af40SRichard Tran Mills 8064f53af40SRichard Tran Mills PetscFunctionBegin; 8074f53af40SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag); 8084f53af40SRichard Tran Mills if (!set || (set && !flag)) { 8094f53af40SRichard Tran Mills ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 8104f53af40SRichard Tran Mills PetscFunctionReturn(0); 8114f53af40SRichard Tran Mills } 8124f53af40SRichard Tran Mills 8134f53af40SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 8144f53af40SRichard Tran Mills p = (Mat_SeqAIJMKL*)P->spptr; 8154f53af40SRichard Tran Mills c = (Mat_SeqAIJMKL*)C->spptr; 8164f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 8174f53af40SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 8184f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 8194f53af40SRichard Tran Mills } 8204f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 8214f53af40SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 8224f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 8234f53af40SRichard Tran Mills } 8244f53af40SRichard Tran Mills csrA = a->csrA; 8254f53af40SRichard Tran Mills csrP = p->csrA; 8264f53af40SRichard Tran Mills csrC = c->csrA; 827b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 828b9e1dd46SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 829b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8304f53af40SRichard Tran Mills 831f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 832b9e1dd46SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 8334f53af40SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 8344f53af40SRichard Tran Mills 8354f53af40SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 8364f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 8374f53af40SRichard Tran Mills 8384f53af40SRichard Tran Mills PetscFunctionReturn(0); 8394f53af40SRichard Tran Mills } 840*431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 8414f53af40SRichard Tran Mills 8424a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 843510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 8444a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 8454a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 8464a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 8474a2a386eSRichard Tran Mills { 8484a2a386eSRichard Tran Mills PetscErrorCode ierr; 8494a2a386eSRichard Tran Mills Mat B = *newmat; 8504a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 851c9d46305SRichard Tran Mills PetscBool set; 852e9c94282SRichard Tran Mills PetscBool sametype; 8534a2a386eSRichard Tran Mills 8544a2a386eSRichard Tran Mills PetscFunctionBegin; 8554a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 8564a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 8574a2a386eSRichard Tran Mills } 8584a2a386eSRichard Tran Mills 859e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 860e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 861e9c94282SRichard Tran Mills 8624a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 8634a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 8644a2a386eSRichard Tran Mills 865df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 866969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 8674a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 8684a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 8694a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 870c9d46305SRichard Tran Mills 8714abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 872ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 873d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 874a8327b06SKarl Rupp #else 875d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 876d995685eSRichard Tran Mills #endif 8775b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 8784abfa3b3SRichard Tran Mills 8794abfa3b3SRichard Tran Mills /* Parse command line options. */ 880c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 88148292275SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 88248292275SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 883c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 884ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 885d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 886d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 887d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 888d995685eSRichard Tran Mills } 889d995685eSRichard Tran Mills #endif 890c9d46305SRichard Tran Mills 891ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 892df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 893969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 894df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 895969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 8968a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 897*431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2; 898e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 899ffcab697SRichard Tran Mills # if !defined(PETSC_USE_COMPLEX) 9004f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 9014f53af40SRichard Tran Mills # endif 902e8be1fc7SRichard Tran Mills # endif 9031950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 9041950a7e7SRichard Tran Mills 905213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 906213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 907213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 908213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 909213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 9101950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 9114a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 912969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 9134a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 914969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 915c9d46305SRichard Tran Mills } 9161950a7e7SRichard Tran Mills #endif 9174a2a386eSRichard Tran Mills 9184a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 9194a2a386eSRichard Tran Mills 9204a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 9214a2a386eSRichard Tran Mills *newmat = B; 9224a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9234a2a386eSRichard Tran Mills } 9244a2a386eSRichard Tran Mills 9254a2a386eSRichard Tran Mills /*@C 9264a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 9274a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 9284a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 92990147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 93090147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 931597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 932597ee276SRichard Tran Mills symmetric A) operations are currently supported. 933597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 93490147e49SRichard Tran Mills 935d083f849SBarry Smith Collective 9364a2a386eSRichard Tran Mills 9374a2a386eSRichard Tran Mills Input Parameters: 9384a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 9394a2a386eSRichard Tran Mills . m - number of rows 9404a2a386eSRichard Tran Mills . n - number of columns 9414a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 9424a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 9434a2a386eSRichard Tran Mills (possibly different for each row) or NULL 9444a2a386eSRichard Tran Mills 9454a2a386eSRichard Tran Mills Output Parameter: 9464a2a386eSRichard Tran Mills . A - the matrix 9474a2a386eSRichard Tran Mills 94890147e49SRichard Tran Mills Options Database Keys: 94966b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 95066b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 95190147e49SRichard Tran Mills 9524a2a386eSRichard Tran Mills Notes: 9534a2a386eSRichard Tran Mills If nnz is given then nz is ignored 9544a2a386eSRichard Tran Mills 9554a2a386eSRichard Tran Mills Level: intermediate 9564a2a386eSRichard Tran Mills 9574a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 9584a2a386eSRichard Tran Mills @*/ 9594a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 9604a2a386eSRichard Tran Mills { 9614a2a386eSRichard Tran Mills PetscErrorCode ierr; 9624a2a386eSRichard Tran Mills 9634a2a386eSRichard Tran Mills PetscFunctionBegin; 9644a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 9654a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 9664a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 9674a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 9684a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9694a2a386eSRichard Tran Mills } 9704a2a386eSRichard Tran Mills 9714a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 9724a2a386eSRichard Tran Mills { 9734a2a386eSRichard Tran Mills PetscErrorCode ierr; 9744a2a386eSRichard Tran Mills 9754a2a386eSRichard Tran Mills PetscFunctionBegin; 9764a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 9774a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 9784a2a386eSRichard Tran Mills PetscFunctionReturn(0); 9794a2a386eSRichard Tran Mills } 980