1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills PetscErrorCode ierr; 324a2a386eSRichard Tran Mills Mat B = *newmat; 33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 344a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35c1d5218aSRichard Tran Mills #endif 364a2a386eSRichard Tran Mills 374a2a386eSRichard Tran Mills PetscFunctionBegin; 384a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 394a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 404a2a386eSRichard Tran Mills } 414a2a386eSRichard Tran Mills 424a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4354871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 444a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4854871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 49ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 554f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 564a2a386eSRichard Tran Mills 57e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 584222ddf1SHong Zhang 59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 604abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 62e9c94282SRichard Tran Mills * the spptr pointer. */ 63a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 64a8327b06SKarl Rupp 654abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 660632b357SRichard Tran Mills sparse_status_t stat; 674abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 68db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 694abfa3b3SRichard Tran Mills } 70ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 71e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 724a2a386eSRichard Tran Mills 734a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 744a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 754a2a386eSRichard Tran Mills 764a2a386eSRichard Tran Mills *newmat = B; 774a2a386eSRichard Tran Mills PetscFunctionReturn(0); 784a2a386eSRichard Tran Mills } 794a2a386eSRichard Tran Mills 804a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 814a2a386eSRichard Tran Mills { 824a2a386eSRichard Tran Mills PetscErrorCode ierr; 834a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 844a2a386eSRichard Tran Mills 854a2a386eSRichard Tran Mills PetscFunctionBegin; 86e9c94282SRichard Tran Mills 87edc89de7SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 88e9c94282SRichard Tran Mills if (aijmkl) { 894a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 914abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 924abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 934abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 94db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 954abfa3b3SRichard Tran Mills } 964abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 974a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 98e9c94282SRichard Tran Mills } 994a2a386eSRichard Tran Mills 1004a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1014a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1024a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1034a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1044a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1054a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1064a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1074a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1084a2a386eSRichard Tran Mills } 1094a2a386eSRichard Tran Mills 110190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1115b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1125b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1135b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1145b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1155b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1165b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1176e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1184a2a386eSRichard Tran Mills { 119ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1206e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1216e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1226e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 12345fbe478SRichard Tran Mills PetscFunctionBegin; 1246e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1256e369cd5SRichard Tran Mills #else 126a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 127a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 128a8327b06SKarl Rupp PetscInt m,n; 129a8327b06SKarl Rupp MatScalar *aa; 130a8327b06SKarl Rupp PetscInt *aj,*ai; 1316e369cd5SRichard Tran Mills sparse_status_t stat; 132551aa5c8SRichard Tran Mills PetscErrorCode ierr; 1334a2a386eSRichard Tran Mills 134a8327b06SKarl Rupp PetscFunctionBegin; 135e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 136e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 137e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 138e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 139e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1406e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1414d51fa23SRichard Tran Mills #endif 1426e369cd5SRichard Tran Mills 1430632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1440632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1450632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1460632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 147db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 1480632b357SRichard Tran Mills } 1498d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1506e369cd5SRichard Tran Mills 151c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 152df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 153df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 154df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 15558678438SRichard Tran Mills m = A->rmap->n; 15658678438SRichard Tran Mills n = A->cmap->n; 157df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 158df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 159df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1601495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1618d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1628d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 16358678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 164db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 165df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 166db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 167df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 168db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 1691950a7e7SRichard Tran Mills if (!aijmkl->no_SpMV2) { 170df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 171db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 1721950a7e7SRichard Tran Mills } 1734abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 174e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 175190ae7a4SRichard Tran Mills } else { 176190ae7a4SRichard Tran Mills aijmkl->csrA = PETSC_NULL; 177c9d46305SRichard Tran Mills } 1786e369cd5SRichard Tran Mills 1796e369cd5SRichard Tran Mills PetscFunctionReturn(0); 180d995685eSRichard Tran Mills #endif 1816e369cd5SRichard Tran Mills } 1826e369cd5SRichard Tran Mills 183b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 184190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 185190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 18619afcda9SRichard Tran Mills { 18719afcda9SRichard Tran Mills PetscErrorCode ierr; 18819afcda9SRichard Tran Mills sparse_status_t stat; 18919afcda9SRichard Tran Mills sparse_index_base_t indexing; 190190ae7a4SRichard Tran Mills PetscInt m,n; 19145fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 19219afcda9SRichard Tran Mills MatScalar *aa; 19319afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 19419afcda9SRichard Tran Mills 195190ae7a4SRichard Tran Mills if (csrA) { 19645fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 197190ae7a4SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 1989c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 199190ae7a4SRichard Tran Mills if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 200190ae7a4SRichard Tran Mills } else { 201190ae7a4SRichard Tran Mills aj = ai = PETSC_NULL; 202190ae7a4SRichard Tran Mills aa = PETSC_NULL; 203aab60f1bSRichard Tran Mills } 204190ae7a4SRichard Tran Mills 20519afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 20645fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 207aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 208aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 209aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 210aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 211190ae7a4SRichard Tran Mills if (csrA) { 212190ae7a4SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 213190ae7a4SRichard Tran Mills } else { 214190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 215190ae7a4SRichard Tran Mills ierr = MatSetUp(A);CHKERRQ(ierr); 216190ae7a4SRichard Tran Mills ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 217190ae7a4SRichard Tran Mills ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 218190ae7a4SRichard Tran Mills } 21919afcda9SRichard Tran Mills 22019afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 22119afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 22219afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2236c87cf42SRichard Tran Mills 22419afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 22519afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2266c87cf42SRichard Tran Mills 22719afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 22819afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 22919afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 230f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 231f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 232f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 233190ae7a4SRichard Tran Mills if (csrA) { 23419afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 235db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 23619afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 237db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 2381950a7e7SRichard Tran Mills } 239e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 24019afcda9SRichard Tran Mills PetscFunctionReturn(0); 24119afcda9SRichard Tran Mills } 242b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 243190ae7a4SRichard Tran Mills 244e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 245e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 246e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 247b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 248190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 249e8be1fc7SRichard Tran Mills { 250e8be1fc7SRichard Tran Mills PetscInt i; 251e8be1fc7SRichard Tran Mills PetscInt nrows,ncols; 252e8be1fc7SRichard Tran Mills PetscInt nz; 253e8be1fc7SRichard Tran Mills PetscInt *ai,*aj,*dummy; 254e8be1fc7SRichard Tran Mills PetscScalar *aa; 255e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 2561495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 257e8be1fc7SRichard Tran Mills sparse_status_t stat; 258e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 259e8be1fc7SRichard Tran Mills 260190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 261190ae7a4SRichard Tran Mills if (!aijmkl->csrA) PetscFunctionReturn(0); 262190ae7a4SRichard Tran Mills 263e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 264e8be1fc7SRichard Tran Mills stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 265e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 266e8be1fc7SRichard Tran Mills 267e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 268e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 269e8be1fc7SRichard Tran Mills for (i=0; i<nrows; i++) { 270e8be1fc7SRichard Tran Mills nz = ai[i+1] - ai[i]; 271e8be1fc7SRichard Tran Mills ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 272e8be1fc7SRichard Tran Mills } 273e8be1fc7SRichard Tran Mills 274e8be1fc7SRichard Tran Mills ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275e8be1fc7SRichard Tran Mills ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276e8be1fc7SRichard Tran Mills 277e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 278a7180b50SRichard Tran Mills /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 279a7180b50SRichard Tran Mills * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 280a7180b50SRichard Tran Mills * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 281a7180b50SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 282e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 283e8be1fc7SRichard Tran Mills } 284b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 285e8be1fc7SRichard Tran Mills 2863849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 2873849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 2883849ddb2SRichard Tran Mills { 2893849ddb2SRichard Tran Mills PetscInt i,j,k; 2903849ddb2SRichard Tran Mills PetscInt nrows,ncols; 2913849ddb2SRichard Tran Mills PetscInt nz; 2923849ddb2SRichard Tran Mills PetscInt *ai,*aj,*dummy; 2933849ddb2SRichard Tran Mills PetscScalar *aa; 2943849ddb2SRichard Tran Mills PetscErrorCode ierr; 2951495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 2963849ddb2SRichard Tran Mills sparse_status_t stat; 2973849ddb2SRichard Tran Mills sparse_index_base_t indexing; 2983849ddb2SRichard Tran Mills 2993849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 3003849ddb2SRichard Tran Mills 3013849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 3023849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 3033849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 3043849ddb2SRichard Tran Mills PetscFunctionReturn(0); 3053849ddb2SRichard Tran Mills } 3063849ddb2SRichard Tran Mills 3073849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 3083849ddb2SRichard Tran Mills stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 3093849ddb2SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 3103849ddb2SRichard Tran Mills 3113849ddb2SRichard Tran Mills k = 0; 3123849ddb2SRichard Tran Mills for (i=0; i<nrows; i++) { 3133849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 3143849ddb2SRichard Tran Mills nz = ai[i+1] - ai[i]; 3153849ddb2SRichard Tran Mills for (j=0; j<nz; j++) { 3163849ddb2SRichard Tran Mills if (aa) { 3173f597eacSJose E. Roman ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr); 3183849ddb2SRichard Tran Mills } else { 3193849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 3203849ddb2SRichard Tran Mills } 3213849ddb2SRichard Tran Mills k++; 3223849ddb2SRichard Tran Mills } 3233849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 3243849ddb2SRichard Tran Mills } 3253849ddb2SRichard Tran Mills PetscFunctionReturn(0); 3263849ddb2SRichard Tran Mills } 3273849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 3283849ddb2SRichard Tran Mills 3296e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 3306e369cd5SRichard Tran Mills { 3316e369cd5SRichard Tran Mills PetscErrorCode ierr; 3321495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3336e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3346e369cd5SRichard Tran Mills 3356e369cd5SRichard Tran Mills PetscFunctionBegin; 3366e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 3376e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 338580bdb30SBarry Smith ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 3396e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3405b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3416e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3425b49642aSRichard Tran Mills } 3436e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3446e369cd5SRichard Tran Mills } 3456e369cd5SRichard Tran Mills 3466e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 3476e369cd5SRichard Tran Mills { 3486e369cd5SRichard Tran Mills PetscErrorCode ierr; 3496e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3505b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3516e369cd5SRichard Tran Mills 3526e369cd5SRichard Tran Mills PetscFunctionBegin; 3536e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3546e369cd5SRichard Tran Mills 3556e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3566e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3576e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3586e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 359d96e85feSRichard Tran Mills * a lot of code duplication. */ 3606e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3616e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 3626e369cd5SRichard Tran Mills 3635b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3645b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3655b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3665b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3676e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3685b49642aSRichard Tran Mills } 369df555b71SRichard Tran Mills 3704a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3714a2a386eSRichard Tran Mills } 3724a2a386eSRichard Tran Mills 373e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 3744a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3754a2a386eSRichard Tran Mills { 3764a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3774a2a386eSRichard Tran Mills const PetscScalar *x; 3784a2a386eSRichard Tran Mills PetscScalar *y; 3794a2a386eSRichard Tran Mills const MatScalar *aa; 3804a2a386eSRichard Tran Mills PetscErrorCode ierr; 3814a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 382db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 383db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 384db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3854a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 386db63039fSRichard Tran Mills char matdescra[6]; 387db63039fSRichard Tran Mills 3884a2a386eSRichard Tran Mills 3894a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 390ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 391ff03dc53SRichard Tran Mills 392ff03dc53SRichard Tran Mills PetscFunctionBegin; 393db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 394db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 395ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 397ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 398ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 399ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 400ff03dc53SRichard Tran Mills 401ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 402db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 403ff03dc53SRichard Tran Mills 404ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 405ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 406ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 407ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 408ff03dc53SRichard Tran Mills } 4091950a7e7SRichard Tran Mills #endif 410ff03dc53SRichard Tran Mills 411ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 412df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 413df555b71SRichard Tran Mills { 414df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 415df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 416df555b71SRichard Tran Mills const PetscScalar *x; 417df555b71SRichard Tran Mills PetscScalar *y; 418df555b71SRichard Tran Mills PetscErrorCode ierr; 419df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 420551aa5c8SRichard Tran Mills PetscObjectState state; 421df555b71SRichard Tran Mills 422df555b71SRichard Tran Mills PetscFunctionBegin; 423df555b71SRichard Tran Mills 42438987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 42538987b35SRichard Tran Mills if (!a->nz) { 42638987b35SRichard Tran Mills PetscInt i; 42738987b35SRichard Tran Mills PetscInt m=A->rmap->n; 42838987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 42938987b35SRichard Tran Mills for (i=0; i<m; i++) { 43038987b35SRichard Tran Mills y[i] = 0.0; 43138987b35SRichard Tran Mills } 43238987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 43338987b35SRichard Tran Mills PetscFunctionReturn(0); 43438987b35SRichard Tran Mills } 435f36dfe3fSRichard Tran Mills 436df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 437df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 438df555b71SRichard Tran Mills 4393fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4403fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4413fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 442551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 443551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 4443fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4453fa15762SRichard Tran Mills } 4463fa15762SRichard Tran Mills 447df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 448df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 449db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 450df555b71SRichard Tran Mills 451df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 452df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 453df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 454df555b71SRichard Tran Mills PetscFunctionReturn(0); 455df555b71SRichard Tran Mills } 456d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 457df555b71SRichard Tran Mills 458e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 459ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 460ff03dc53SRichard Tran Mills { 461ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 462ff03dc53SRichard Tran Mills const PetscScalar *x; 463ff03dc53SRichard Tran Mills PetscScalar *y; 464ff03dc53SRichard Tran Mills const MatScalar *aa; 465ff03dc53SRichard Tran Mills PetscErrorCode ierr; 466ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 467db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 468db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 469db63039fSRichard Tran Mills PetscScalar beta = 0.0; 470ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 471db63039fSRichard Tran Mills char matdescra[6]; 472ff03dc53SRichard Tran Mills 473ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 474ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4754a2a386eSRichard Tran Mills 4764a2a386eSRichard Tran Mills PetscFunctionBegin; 477969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 478969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4794a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4804a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 4814a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4824a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4834a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4844a2a386eSRichard Tran Mills 4854a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 486db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4874a2a386eSRichard Tran Mills 4884a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 4894a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4904a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 4914a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4924a2a386eSRichard Tran Mills } 4931950a7e7SRichard Tran Mills #endif 4944a2a386eSRichard Tran Mills 495ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 496df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 497df555b71SRichard Tran Mills { 498df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 499df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 500df555b71SRichard Tran Mills const PetscScalar *x; 501df555b71SRichard Tran Mills PetscScalar *y; 502df555b71SRichard Tran Mills PetscErrorCode ierr; 5030632b357SRichard Tran Mills sparse_status_t stat; 504551aa5c8SRichard Tran Mills PetscObjectState state; 505df555b71SRichard Tran Mills 506df555b71SRichard Tran Mills PetscFunctionBegin; 507df555b71SRichard Tran Mills 50838987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 50938987b35SRichard Tran Mills if (!a->nz) { 51038987b35SRichard Tran Mills PetscInt i; 51138987b35SRichard Tran Mills PetscInt n=A->cmap->n; 51238987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 51338987b35SRichard Tran Mills for (i=0; i<n; i++) { 51438987b35SRichard Tran Mills y[i] = 0.0; 51538987b35SRichard Tran Mills } 51638987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 51738987b35SRichard Tran Mills PetscFunctionReturn(0); 51838987b35SRichard Tran Mills } 519f36dfe3fSRichard Tran Mills 520df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 521df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 522df555b71SRichard Tran Mills 5233fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5243fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5253fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 526551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 527551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 5283fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5293fa15762SRichard Tran Mills } 5303fa15762SRichard Tran Mills 531df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 532df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 533db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 534df555b71SRichard Tran Mills 535df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 536df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 537df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 538df555b71SRichard Tran Mills PetscFunctionReturn(0); 539df555b71SRichard Tran Mills } 540d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 541df555b71SRichard Tran Mills 542e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 5434a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 5444a2a386eSRichard Tran Mills { 5454a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5464a2a386eSRichard Tran Mills const PetscScalar *x; 5474a2a386eSRichard Tran Mills PetscScalar *y,*z; 5484a2a386eSRichard Tran Mills const MatScalar *aa; 5494a2a386eSRichard Tran Mills PetscErrorCode ierr; 5504a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 551db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 5524a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 5534a2a386eSRichard Tran Mills PetscInt i; 5544a2a386eSRichard Tran Mills 555ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 556ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 557a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 558db63039fSRichard Tran Mills PetscScalar beta; 559a84739b8SRichard Tran Mills char matdescra[6]; 560ff03dc53SRichard Tran Mills 561ff03dc53SRichard Tran Mills PetscFunctionBegin; 562a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 563a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 564a84739b8SRichard Tran Mills 565ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 566ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 567ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 568ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 569ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 570ff03dc53SRichard Tran Mills 571ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 572a84739b8SRichard Tran Mills if (zz == yy) { 573a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 574db63039fSRichard Tran Mills beta = 1.0; 575db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 576a84739b8SRichard Tran Mills } else { 577db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 578db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 579db63039fSRichard Tran Mills beta = 0.0; 580db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 581ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 582ff03dc53SRichard Tran Mills z[i] += y[i]; 583ff03dc53SRichard Tran Mills } 584a84739b8SRichard Tran Mills } 585ff03dc53SRichard Tran Mills 586ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 587ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 588ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 589ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 590ff03dc53SRichard Tran Mills } 5911950a7e7SRichard Tran Mills #endif 592ff03dc53SRichard Tran Mills 593ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 594df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 595df555b71SRichard Tran Mills { 596df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 598df555b71SRichard Tran Mills const PetscScalar *x; 599df555b71SRichard Tran Mills PetscScalar *y,*z; 600df555b71SRichard Tran Mills PetscErrorCode ierr; 601df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 602df555b71SRichard Tran Mills PetscInt i; 603df555b71SRichard Tran Mills 604df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 605df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 606551aa5c8SRichard Tran Mills PetscObjectState state; 607df555b71SRichard Tran Mills 608df555b71SRichard Tran Mills PetscFunctionBegin; 609df555b71SRichard Tran Mills 61038987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 61138987b35SRichard Tran Mills if (!a->nz) { 61238987b35SRichard Tran Mills PetscInt i; 61338987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 61438987b35SRichard Tran Mills for (i=0; i<m; i++) { 61538987b35SRichard Tran Mills z[i] = y[i]; 61638987b35SRichard Tran Mills } 61738987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 61838987b35SRichard Tran Mills PetscFunctionReturn(0); 61938987b35SRichard Tran Mills } 620df555b71SRichard Tran Mills 621df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 622df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 623df555b71SRichard Tran Mills 6243fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6253fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6263fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 627551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 628551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 6293fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 6303fa15762SRichard Tran Mills } 6313fa15762SRichard Tran Mills 632df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 633df555b71SRichard Tran Mills if (zz == yy) { 634df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 635df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 636db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 637db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 638df555b71SRichard Tran Mills } else { 639df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 640df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 641db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 642db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 643df555b71SRichard Tran Mills for (i=0; i<m; i++) { 644df555b71SRichard Tran Mills z[i] += y[i]; 645df555b71SRichard Tran Mills } 646df555b71SRichard Tran Mills } 647df555b71SRichard Tran Mills 648df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 649df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 650df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 651df555b71SRichard Tran Mills PetscFunctionReturn(0); 652df555b71SRichard Tran Mills } 653d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 654df555b71SRichard Tran Mills 655e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 656ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 657ff03dc53SRichard Tran Mills { 658ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 659ff03dc53SRichard Tran Mills const PetscScalar *x; 660ff03dc53SRichard Tran Mills PetscScalar *y,*z; 661ff03dc53SRichard Tran Mills const MatScalar *aa; 662ff03dc53SRichard Tran Mills PetscErrorCode ierr; 663ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 664db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 665ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 666ff03dc53SRichard Tran Mills PetscInt i; 667ff03dc53SRichard Tran Mills 668ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 669ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 670a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 671db63039fSRichard Tran Mills PetscScalar beta; 672a84739b8SRichard Tran Mills char matdescra[6]; 6734a2a386eSRichard Tran Mills 6744a2a386eSRichard Tran Mills PetscFunctionBegin; 675a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 676a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 677a84739b8SRichard Tran Mills 6784a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6794a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6804a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6814a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6824a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6834a2a386eSRichard Tran Mills 6844a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 685a84739b8SRichard Tran Mills if (zz == yy) { 686a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 687db63039fSRichard Tran Mills beta = 1.0; 688969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 689a84739b8SRichard Tran Mills } else { 690db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 691db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 692db63039fSRichard Tran Mills beta = 0.0; 693db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 694969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6954a2a386eSRichard Tran Mills z[i] += y[i]; 6964a2a386eSRichard Tran Mills } 697a84739b8SRichard Tran Mills } 6984a2a386eSRichard Tran Mills 6994a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 7004a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7014a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 7024a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7034a2a386eSRichard Tran Mills } 7041950a7e7SRichard Tran Mills #endif 7054a2a386eSRichard Tran Mills 706ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 707df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 708df555b71SRichard Tran Mills { 709df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 711df555b71SRichard Tran Mills const PetscScalar *x; 712df555b71SRichard Tran Mills PetscScalar *y,*z; 713df555b71SRichard Tran Mills PetscErrorCode ierr; 714969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 715df555b71SRichard Tran Mills PetscInt i; 716551aa5c8SRichard Tran Mills PetscObjectState state; 717df555b71SRichard Tran Mills 718df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 719df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 720df555b71SRichard Tran Mills 721df555b71SRichard Tran Mills PetscFunctionBegin; 722df555b71SRichard Tran Mills 72338987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 72438987b35SRichard Tran Mills if (!a->nz) { 72538987b35SRichard Tran Mills PetscInt i; 72638987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 72738987b35SRichard Tran Mills for (i=0; i<n; i++) { 72838987b35SRichard Tran Mills z[i] = y[i]; 72938987b35SRichard Tran Mills } 73038987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 73138987b35SRichard Tran Mills PetscFunctionReturn(0); 73238987b35SRichard Tran Mills } 733f36dfe3fSRichard Tran Mills 734df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 735df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 736df555b71SRichard Tran Mills 7373fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 7383fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 7393fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 740551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 741551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 7423fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 7433fa15762SRichard Tran Mills } 7443fa15762SRichard Tran Mills 745df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 746df555b71SRichard Tran Mills if (zz == yy) { 747df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 748df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 749db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 750db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 751df555b71SRichard Tran Mills } else { 752df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 753df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 754db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 755db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 756969800c5SRichard Tran Mills for (i=0; i<n; i++) { 757df555b71SRichard Tran Mills z[i] += y[i]; 758df555b71SRichard Tran Mills } 759df555b71SRichard Tran Mills } 760df555b71SRichard Tran Mills 761df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 762df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 763df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 764df555b71SRichard Tran Mills PetscFunctionReturn(0); 765df555b71SRichard Tran Mills } 766d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 767df555b71SRichard Tran Mills 768190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */ 7698a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 770190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 771431879ecSRichard Tran Mills { 7721495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 773431879ecSRichard Tran Mills sparse_matrix_t csrA,csrB,csrC; 774190ae7a4SRichard Tran Mills PetscInt nrows,ncols; 775431879ecSRichard Tran Mills PetscErrorCode ierr; 776431879ecSRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 777431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 778431879ecSRichard Tran Mills PetscObjectState state; 779431879ecSRichard Tran Mills 780431879ecSRichard Tran Mills PetscFunctionBegin; 781190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 782190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 783190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 784190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 785190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 786190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 787190ae7a4SRichard Tran Mills 788431879ecSRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 789431879ecSRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 790431879ecSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 791431879ecSRichard Tran Mills } 792431879ecSRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 793431879ecSRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 794431879ecSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 795431879ecSRichard Tran Mills } 796431879ecSRichard Tran Mills csrA = a->csrA; 797431879ecSRichard Tran Mills csrB = b->csrA; 798431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 799431879ecSRichard Tran Mills 800190ae7a4SRichard Tran Mills if (csrA && csrB) { 801190ae7a4SRichard Tran Mills stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 802db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 803190ae7a4SRichard Tran Mills } else { 804190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 805190ae7a4SRichard Tran Mills } 806190ae7a4SRichard Tran Mills 807190ae7a4SRichard Tran Mills ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 808431879ecSRichard Tran Mills 809431879ecSRichard Tran Mills PetscFunctionReturn(0); 810431879ecSRichard Tran Mills } 811431879ecSRichard Tran Mills 812190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 813e8be1fc7SRichard Tran Mills { 8141495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 815e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 816e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 817e8be1fc7SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 818e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 819e8be1fc7SRichard Tran Mills PetscObjectState state; 820e8be1fc7SRichard Tran Mills 821e8be1fc7SRichard Tran Mills PetscFunctionBegin; 822e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 823e8be1fc7SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 824e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 825e8be1fc7SRichard Tran Mills } 826e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 827e8be1fc7SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 828e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 829e8be1fc7SRichard Tran Mills } 830e8be1fc7SRichard Tran Mills csrA = a->csrA; 831e8be1fc7SRichard Tran Mills csrB = b->csrA; 832e8be1fc7SRichard Tran Mills csrC = c->csrA; 833e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 834e8be1fc7SRichard Tran Mills 835190ae7a4SRichard Tran Mills if (csrA && csrB) { 836190ae7a4SRichard Tran Mills stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 837db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 838190ae7a4SRichard Tran Mills } else { 839190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 840190ae7a4SRichard Tran Mills } 841e8be1fc7SRichard Tran Mills 842e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 8434f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 844e8be1fc7SRichard Tran Mills 845e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 846e8be1fc7SRichard Tran Mills } 847e8be1fc7SRichard Tran Mills 848190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 8494f53af40SRichard Tran Mills { 850190ae7a4SRichard Tran Mills PetscErrorCode ierr; 851190ae7a4SRichard Tran Mills 852190ae7a4SRichard Tran Mills PetscFunctionBegin; 853190ae7a4SRichard Tran Mills ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 854190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 855190ae7a4SRichard Tran Mills } 856190ae7a4SRichard Tran Mills 857190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 858190ae7a4SRichard Tran Mills { 859190ae7a4SRichard Tran Mills PetscErrorCode ierr; 860190ae7a4SRichard Tran Mills 861190ae7a4SRichard Tran Mills PetscFunctionBegin; 862190ae7a4SRichard Tran Mills ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 863190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 864190ae7a4SRichard Tran Mills } 865190ae7a4SRichard Tran Mills 866190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 867190ae7a4SRichard Tran Mills { 868190ae7a4SRichard Tran Mills PetscErrorCode ierr; 869190ae7a4SRichard Tran Mills 870190ae7a4SRichard Tran Mills PetscFunctionBegin; 871190ae7a4SRichard Tran Mills ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 872190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 873190ae7a4SRichard Tran Mills } 874190ae7a4SRichard Tran Mills 875190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 876190ae7a4SRichard Tran Mills { 877190ae7a4SRichard Tran Mills PetscErrorCode ierr; 878190ae7a4SRichard Tran Mills 879190ae7a4SRichard Tran Mills PetscFunctionBegin; 880190ae7a4SRichard Tran Mills ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 881190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 882190ae7a4SRichard Tran Mills } 883190ae7a4SRichard Tran Mills 884190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 885190ae7a4SRichard Tran Mills { 886190ae7a4SRichard Tran Mills PetscErrorCode ierr; 887190ae7a4SRichard Tran Mills 888190ae7a4SRichard Tran Mills PetscFunctionBegin; 889190ae7a4SRichard Tran Mills ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 890190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 891190ae7a4SRichard Tran Mills } 892190ae7a4SRichard Tran Mills 893190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 894190ae7a4SRichard Tran Mills { 895190ae7a4SRichard Tran Mills PetscErrorCode ierr; 896190ae7a4SRichard Tran Mills 897190ae7a4SRichard Tran Mills PetscFunctionBegin; 898190ae7a4SRichard Tran Mills ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 899190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 900190ae7a4SRichard Tran Mills } 901190ae7a4SRichard Tran Mills 902190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 903190ae7a4SRichard Tran Mills { 904190ae7a4SRichard Tran Mills PetscErrorCode ierr; 905190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 906190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 907190ae7a4SRichard Tran Mills 908190ae7a4SRichard Tran Mills PetscFunctionBegin; 909190ae7a4SRichard Tran Mills ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 910190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 911190ae7a4SRichard Tran Mills } 912190ae7a4SRichard Tran Mills 913190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 914190ae7a4SRichard Tran Mills { 915190ae7a4SRichard Tran Mills PetscErrorCode ierr; 916190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 917190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 918190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 919190ae7a4SRichard Tran Mills 920190ae7a4SRichard Tran Mills PetscFunctionBegin; 921190ae7a4SRichard Tran Mills ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 922190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 923190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 924190ae7a4SRichard Tran Mills } 925190ae7a4SRichard Tran Mills 92649ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 927190ae7a4SRichard Tran Mills { 928190ae7a4SRichard Tran Mills Mat Ct; 929190ae7a4SRichard Tran Mills Vec zeros; 9301495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 9314f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 9324f53af40SRichard Tran Mills PetscBool set, flag; 9334f53af40SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 934b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 9354f53af40SRichard Tran Mills PetscObjectState state; 9364f53af40SRichard Tran Mills PetscErrorCode ierr; 9374f53af40SRichard Tran Mills 9384f53af40SRichard Tran Mills PetscFunctionBegin; 9394f53af40SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag); 94037f0d54fSRichard Tran Mills if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 9414f53af40SRichard Tran Mills 9424f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 9434f53af40SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 9444f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 9454f53af40SRichard Tran Mills } 9464f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 9474f53af40SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 9484f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 9494f53af40SRichard Tran Mills } 9504f53af40SRichard Tran Mills csrA = a->csrA; 9514f53af40SRichard Tran Mills csrP = p->csrA; 9524f53af40SRichard Tran Mills csrC = c->csrA; 953b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 954190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 955b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 9564f53af40SRichard Tran Mills 957f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 958b9e1dd46SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 959db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 9604f53af40SRichard Tran Mills 961190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 962190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 963190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 964190ae7a4SRichard Tran Mills * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 965190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 966190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 967190ae7a4SRichard Tran Mills * the full matrix. */ 9684f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 969190ae7a4SRichard Tran Mills ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 970190ae7a4SRichard Tran Mills ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 971190ae7a4SRichard Tran Mills ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 972190ae7a4SRichard Tran Mills ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 973190ae7a4SRichard Tran Mills ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 974190ae7a4SRichard Tran Mills ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 975190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 976190ae7a4SRichard Tran Mills ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 9771495fedeSRichard Tran Mills ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 978190ae7a4SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 979190ae7a4SRichard Tran Mills ierr = VecDestroy(&zeros);CHKERRQ(ierr); 980190ae7a4SRichard Tran Mills ierr = MatDestroy(&Ct);CHKERRQ(ierr); 9814f53af40SRichard Tran Mills PetscFunctionReturn(0); 9824f53af40SRichard Tran Mills } 983190ae7a4SRichard Tran Mills 984190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 985190ae7a4SRichard Tran Mills { 986190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 987190ae7a4SRichard Tran Mills Mat A = product->A,P = product->B; 9881495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 989190ae7a4SRichard Tran Mills sparse_matrix_t csrA,csrP,csrC; 990190ae7a4SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 991190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 992190ae7a4SRichard Tran Mills PetscObjectState state; 993190ae7a4SRichard Tran Mills PetscErrorCode ierr; 994190ae7a4SRichard Tran Mills 995190ae7a4SRichard Tran Mills PetscFunctionBegin; 996190ae7a4SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 997190ae7a4SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 998190ae7a4SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 999190ae7a4SRichard Tran Mills } 1000190ae7a4SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1001190ae7a4SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 1002190ae7a4SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 1003190ae7a4SRichard Tran Mills } 1004190ae7a4SRichard Tran Mills csrA = a->csrA; 1005190ae7a4SRichard Tran Mills csrP = p->csrA; 1006190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1007190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1008190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1009190ae7a4SRichard Tran Mills 1010190ae7a4SRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1011190ae7a4SRichard Tran Mills if (csrP && csrA) { 1012190ae7a4SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1013db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1014190ae7a4SRichard Tran Mills } else { 1015190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 1016190ae7a4SRichard Tran Mills } 1017190ae7a4SRichard Tran Mills 1018190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1019190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 102049ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 102149ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 102249ba5396SRichard Tran Mills * is guaranteed. */ 1023190ae7a4SRichard Tran Mills ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1024190ae7a4SRichard Tran Mills 1025190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 1026190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1027190ae7a4SRichard Tran Mills } 1028190ae7a4SRichard Tran Mills 1029190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1030190ae7a4SRichard Tran Mills { 1031190ae7a4SRichard Tran Mills PetscFunctionBegin; 1032190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 1033190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1034190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1035190ae7a4SRichard Tran Mills } 1036190ae7a4SRichard Tran Mills 1037190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1038190ae7a4SRichard Tran Mills { 1039190ae7a4SRichard Tran Mills PetscFunctionBegin; 1040190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1041190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1042190ae7a4SRichard Tran Mills } 1043190ae7a4SRichard Tran Mills 1044190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1045190ae7a4SRichard Tran Mills { 1046190ae7a4SRichard Tran Mills PetscFunctionBegin; 1047190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1048190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 1049190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1050190ae7a4SRichard Tran Mills } 1051190ae7a4SRichard Tran Mills 1052190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1053190ae7a4SRichard Tran Mills { 1054190ae7a4SRichard Tran Mills PetscErrorCode ierr; 1055190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 1056190ae7a4SRichard Tran Mills Mat A = product->A; 1057190ae7a4SRichard Tran Mills PetscBool set, flag; 1058190ae7a4SRichard Tran Mills 1059190ae7a4SRichard Tran Mills PetscFunctionBegin; 1060*a3d67537SPierre Jolivet if (PetscDefined(USE_COMPLEX)) { 10612ab6f6a8SStefano Zampini /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 10622ab6f6a8SStefano Zampini * We do this in several other locations in this file. This works for the time being, but these 1063190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 10642ab6f6a8SStefano Zampini * TODO: Add proper MATSEQAIJMKL implementations */ 1065190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 1066*a3d67537SPierre Jolivet } else { 1067190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1068190ae7a4SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1069*a3d67537SPierre Jolivet if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1070*a3d67537SPierre Jolivet else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 10711495fedeSRichard Tran Mills /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1072190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1073*a3d67537SPierre Jolivet } 1074190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1075190ae7a4SRichard Tran Mills } 1076190ae7a4SRichard Tran Mills 1077190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1078190ae7a4SRichard Tran Mills { 1079190ae7a4SRichard Tran Mills PetscFunctionBegin; 10802ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1081190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1082190ae7a4SRichard Tran Mills } 1083190ae7a4SRichard Tran Mills 1084190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1085190ae7a4SRichard Tran Mills { 1086190ae7a4SRichard Tran Mills PetscFunctionBegin; 10872ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1088190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1089190ae7a4SRichard Tran Mills } 1090190ae7a4SRichard Tran Mills 1091190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1092190ae7a4SRichard Tran Mills { 1093190ae7a4SRichard Tran Mills PetscErrorCode ierr; 1094190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 1095190ae7a4SRichard Tran Mills 1096190ae7a4SRichard Tran Mills PetscFunctionBegin; 1097190ae7a4SRichard Tran Mills switch (product->type) { 1098190ae7a4SRichard Tran Mills case MATPRODUCT_AB: 1099190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1100190ae7a4SRichard Tran Mills break; 1101190ae7a4SRichard Tran Mills case MATPRODUCT_AtB: 1102190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1103190ae7a4SRichard Tran Mills break; 1104190ae7a4SRichard Tran Mills case MATPRODUCT_ABt: 1105190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1106190ae7a4SRichard Tran Mills break; 1107190ae7a4SRichard Tran Mills case MATPRODUCT_PtAP: 1108190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1109190ae7a4SRichard Tran Mills break; 1110190ae7a4SRichard Tran Mills case MATPRODUCT_RARt: 1111190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1112190ae7a4SRichard Tran Mills break; 1113190ae7a4SRichard Tran Mills case MATPRODUCT_ABC: 1114190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1115190ae7a4SRichard Tran Mills break; 1116190ae7a4SRichard Tran Mills default: 1117190ae7a4SRichard Tran Mills break; 1118190ae7a4SRichard Tran Mills } 1119190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1120190ae7a4SRichard Tran Mills } 1121431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1122190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */ 11234f53af40SRichard Tran Mills 11244a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1125510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 11264a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 11274a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 11284a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 11294a2a386eSRichard Tran Mills { 11304a2a386eSRichard Tran Mills PetscErrorCode ierr; 11314a2a386eSRichard Tran Mills Mat B = *newmat; 11324a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1133c9d46305SRichard Tran Mills PetscBool set; 1134e9c94282SRichard Tran Mills PetscBool sametype; 11354a2a386eSRichard Tran Mills 11364a2a386eSRichard Tran Mills PetscFunctionBegin; 11374a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 11384a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 11394a2a386eSRichard Tran Mills } 11404a2a386eSRichard Tran Mills 1141e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1142e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 1143e9c94282SRichard Tran Mills 11444a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 11454a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 11464a2a386eSRichard Tran Mills 1147df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 1148969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 11494a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 11504a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 11514a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 1152c9d46305SRichard Tran Mills 11534abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1154ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1155d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1156a8327b06SKarl Rupp #else 1157d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1158d995685eSRichard Tran Mills #endif 11595b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 11604abfa3b3SRichard Tran Mills 11614abfa3b3SRichard Tran Mills /* Parse command line options. */ 1162c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 116348292275SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 116448292275SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1165c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 1166ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1167d995685eSRichard Tran Mills if (!aijmkl->no_SpMV2) { 1168d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1169d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1170d995685eSRichard Tran Mills } 1171d995685eSRichard Tran Mills #endif 1172c9d46305SRichard Tran Mills 1173ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1174df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1175969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1176df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1177969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 11788a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1179190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1180190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1181190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1182190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1183190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1184ffcab697SRichard Tran Mills # if !defined(PETSC_USE_COMPLEX) 118549ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1186190ae7a4SRichard Tran Mills # else 1187190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 11884f53af40SRichard Tran Mills # endif 1189e8be1fc7SRichard Tran Mills # endif 11901950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 11911950a7e7SRichard Tran Mills 1192213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1193213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1194213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1195213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1196213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 11971950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 11984a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1199969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 12004a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1201969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1202c9d46305SRichard Tran Mills } 12031950a7e7SRichard Tran Mills #endif 12044a2a386eSRichard Tran Mills 12054a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 12064a2a386eSRichard Tran Mills 12074a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 12084a2a386eSRichard Tran Mills *newmat = B; 12094a2a386eSRichard Tran Mills PetscFunctionReturn(0); 12104a2a386eSRichard Tran Mills } 12114a2a386eSRichard Tran Mills 12124a2a386eSRichard Tran Mills /*@C 12134a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 12144a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 12154a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 121690147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 121790147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 1218597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1219597ee276SRichard Tran Mills symmetric A) operations are currently supported. 1220597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 122190147e49SRichard Tran Mills 1222d083f849SBarry Smith Collective 12234a2a386eSRichard Tran Mills 12244a2a386eSRichard Tran Mills Input Parameters: 12254a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 12264a2a386eSRichard Tran Mills . m - number of rows 12274a2a386eSRichard Tran Mills . n - number of columns 12284a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 12294a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 12304a2a386eSRichard Tran Mills (possibly different for each row) or NULL 12314a2a386eSRichard Tran Mills 12324a2a386eSRichard Tran Mills Output Parameter: 12334a2a386eSRichard Tran Mills . A - the matrix 12344a2a386eSRichard Tran Mills 123590147e49SRichard Tran Mills Options Database Keys: 123666b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 123766b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 123890147e49SRichard Tran Mills 12394a2a386eSRichard Tran Mills Notes: 12404a2a386eSRichard Tran Mills If nnz is given then nz is ignored 12414a2a386eSRichard Tran Mills 12424a2a386eSRichard Tran Mills Level: intermediate 12434a2a386eSRichard Tran Mills 12444a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 12454a2a386eSRichard Tran Mills @*/ 12464a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 12474a2a386eSRichard Tran Mills { 12484a2a386eSRichard Tran Mills PetscErrorCode ierr; 12494a2a386eSRichard Tran Mills 12504a2a386eSRichard Tran Mills PetscFunctionBegin; 12514a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 12524a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 12534a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 12544a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 12554a2a386eSRichard Tran Mills PetscFunctionReturn(0); 12564a2a386eSRichard Tran Mills } 12574a2a386eSRichard Tran Mills 12584a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 12594a2a386eSRichard Tran Mills { 12604a2a386eSRichard Tran Mills PetscErrorCode ierr; 12614a2a386eSRichard Tran Mills 12624a2a386eSRichard Tran Mills PetscFunctionBegin; 12634a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 12644a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 12654a2a386eSRichard Tran Mills PetscFunctionReturn(0); 12664a2a386eSRichard Tran Mills } 1267