1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills Mat B = *newmat; 32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 334a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 34c1d5218aSRichard Tran Mills #endif 354a2a386eSRichard Tran Mills 364a2a386eSRichard Tran Mills PetscFunctionBegin; 37*5f80ce2aSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B)); 384a2a386eSRichard Tran Mills 394a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4054871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 414a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 424a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4354871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 44ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4554871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 46ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 524f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 534a2a386eSRichard Tran Mills 54*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL)); 554222ddf1SHong Zhang 56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 574abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 59e9c94282SRichard Tran Mills * the spptr pointer. */ 60a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61a8327b06SKarl Rupp 62*5f80ce2aSJacob Faibussowitsch if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 64*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(B->spptr)); 654a2a386eSRichard Tran Mills 664a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 67*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 684a2a386eSRichard Tran Mills 694a2a386eSRichard Tran Mills *newmat = B; 704a2a386eSRichard Tran Mills PetscFunctionReturn(0); 714a2a386eSRichard Tran Mills } 724a2a386eSRichard Tran Mills 734a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 744a2a386eSRichard Tran Mills { 754a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 764a2a386eSRichard Tran Mills 774a2a386eSRichard Tran Mills PetscFunctionBegin; 78e9c94282SRichard Tran Mills 79edc89de7SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80e9c94282SRichard Tran Mills if (aijmkl) { 814a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 83*5f80ce2aSJacob Faibussowitsch if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 844abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(A->spptr)); 86e9c94282SRichard Tran Mills } 874a2a386eSRichard Tran Mills 884a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 894a2a386eSRichard Tran Mills * to destroy everything that remains. */ 90*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 914a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 924a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 934a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 94*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy_SeqAIJ(A)); 954a2a386eSRichard Tran Mills PetscFunctionReturn(0); 964a2a386eSRichard Tran Mills } 974a2a386eSRichard Tran Mills 98190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 995b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1005b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1015b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1025b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1035b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1045b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1056e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1064a2a386eSRichard Tran Mills { 107ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1086e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1096e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1106e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 11145fbe478SRichard Tran Mills PetscFunctionBegin; 1126e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1136e369cd5SRichard Tran Mills #else 114a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 116a8327b06SKarl Rupp PetscInt m,n; 117a8327b06SKarl Rupp MatScalar *aa; 118a8327b06SKarl Rupp PetscInt *aj,*ai; 1194a2a386eSRichard Tran Mills 120a8327b06SKarl Rupp PetscFunctionBegin; 121e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 122e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 123e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 124e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 125e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1266e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1274d51fa23SRichard Tran Mills #endif 1286e369cd5SRichard Tran Mills 1290632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1300632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1310632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 132*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 1330632b357SRichard Tran Mills } 1348d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1356e369cd5SRichard Tran Mills 136c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 137df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 138df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 139df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 14058678438SRichard Tran Mills m = A->rmap->n; 14158678438SRichard Tran Mills n = A->cmap->n; 142df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 143df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 144df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1451495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1468d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1478d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 148*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 149*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 150*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 1511950a7e7SRichard Tran Mills if (!aijmkl->no_SpMV2) { 152*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA); 1531950a7e7SRichard Tran Mills } 1544abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 155*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 156190ae7a4SRichard Tran Mills } else { 157190ae7a4SRichard Tran Mills aijmkl->csrA = PETSC_NULL; 158c9d46305SRichard Tran Mills } 1596e369cd5SRichard Tran Mills 1606e369cd5SRichard Tran Mills PetscFunctionReturn(0); 161d995685eSRichard Tran Mills #endif 1626e369cd5SRichard Tran Mills } 1636e369cd5SRichard Tran Mills 164b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 165190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 166190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 16719afcda9SRichard Tran Mills { 16819afcda9SRichard Tran Mills sparse_index_base_t indexing; 169190ae7a4SRichard Tran Mills PetscInt m,n; 17045fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 17119afcda9SRichard Tran Mills MatScalar *aa; 17219afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 17319afcda9SRichard Tran Mills 174362febeeSStefano Zampini PetscFunctionBegin; 175190ae7a4SRichard Tran Mills if (csrA) { 17645fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 177*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 178*5f80ce2aSJacob Faibussowitsch PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 179190ae7a4SRichard Tran Mills } else { 180190ae7a4SRichard Tran Mills aj = ai = PETSC_NULL; 181190ae7a4SRichard Tran Mills aa = PETSC_NULL; 182aab60f1bSRichard Tran Mills } 183190ae7a4SRichard Tran Mills 184*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(A,MATSEQAIJ)); 185*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols)); 186aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 187aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 188aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 189aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 190190ae7a4SRichard Tran Mills if (csrA) { 191*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL)); 192190ae7a4SRichard Tran Mills } else { 193190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 194*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetUp(A)); 195*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 196*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 197190ae7a4SRichard Tran Mills } 19819afcda9SRichard Tran Mills 19919afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 20019afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 201*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 2026c87cf42SRichard Tran Mills 20319afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 20419afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2056c87cf42SRichard Tran Mills 20619afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 20719afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 20819afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 209f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 210f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 211f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 212190ae7a4SRichard Tran Mills if (csrA) { 213*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 214*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 2151950a7e7SRichard Tran Mills } 216*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 21719afcda9SRichard Tran Mills PetscFunctionReturn(0); 21819afcda9SRichard Tran Mills } 219b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 220190ae7a4SRichard Tran Mills 221e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 222e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 223e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 224b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 225190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 226e8be1fc7SRichard Tran Mills { 227e8be1fc7SRichard Tran Mills PetscInt i; 228e8be1fc7SRichard Tran Mills PetscInt nrows,ncols; 229e8be1fc7SRichard Tran Mills PetscInt nz; 230e8be1fc7SRichard Tran Mills PetscInt *ai,*aj,*dummy; 231e8be1fc7SRichard Tran Mills PetscScalar *aa; 2321495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 233e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 234e8be1fc7SRichard Tran Mills 235362febeeSStefano Zampini PetscFunctionBegin; 236190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 237190ae7a4SRichard Tran Mills if (!aijmkl->csrA) PetscFunctionReturn(0); 238190ae7a4SRichard Tran Mills 239e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 240*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 241e8be1fc7SRichard Tran Mills 242e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 243e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 244e8be1fc7SRichard Tran Mills for (i=0; i<nrows; i++) { 245e8be1fc7SRichard Tran Mills nz = ai[i+1] - ai[i]; 246*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES)); 247e8be1fc7SRichard Tran Mills } 248e8be1fc7SRichard Tran Mills 249*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 250*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 251e8be1fc7SRichard Tran Mills 252*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 253a7180b50SRichard Tran Mills /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 254a7180b50SRichard Tran Mills * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 255a7180b50SRichard Tran Mills * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 256a7180b50SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 257e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 258e8be1fc7SRichard Tran Mills } 259b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 260e8be1fc7SRichard Tran Mills 2613849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 2623849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 2633849ddb2SRichard Tran Mills { 2643849ddb2SRichard Tran Mills PetscInt i,j,k; 2653849ddb2SRichard Tran Mills PetscInt nrows,ncols; 2663849ddb2SRichard Tran Mills PetscInt nz; 2673849ddb2SRichard Tran Mills PetscInt *ai,*aj,*dummy; 2683849ddb2SRichard Tran Mills PetscScalar *aa; 2691495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 2703849ddb2SRichard Tran Mills sparse_index_base_t indexing; 2713849ddb2SRichard Tran Mills 272362febeeSStefano Zampini PetscFunctionBegin; 273*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 2743849ddb2SRichard Tran Mills 2753849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 2763849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 277*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n")); 2783849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2793849ddb2SRichard Tran Mills } 2803849ddb2SRichard Tran Mills 2813849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 282*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 2833849ddb2SRichard Tran Mills 2843849ddb2SRichard Tran Mills k = 0; 2853849ddb2SRichard Tran Mills for (i=0; i<nrows; i++) { 286*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i)); 2873849ddb2SRichard Tran Mills nz = ai[i+1] - ai[i]; 2883849ddb2SRichard Tran Mills for (j=0; j<nz; j++) { 2893849ddb2SRichard Tran Mills if (aa) { 290*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g) ",aj[k],PetscRealPart(aa[k]))); 2913849ddb2SRichard Tran Mills } else { 292*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k])); 2933849ddb2SRichard Tran Mills } 2943849ddb2SRichard Tran Mills k++; 2953849ddb2SRichard Tran Mills } 296*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIPrintf(viewer,"\n")); 2973849ddb2SRichard Tran Mills } 2983849ddb2SRichard Tran Mills PetscFunctionReturn(0); 2993849ddb2SRichard Tran Mills } 3003849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 3013849ddb2SRichard Tran Mills 3026e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 3036e369cd5SRichard Tran Mills { 3041495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3056e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3066e369cd5SRichard Tran Mills 3076e369cd5SRichard Tran Mills PetscFunctionBegin; 308*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate_SeqAIJ(A,op,M)); 3096e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 310*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(aijmkl_dest,aijmkl,1)); 3116e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3125b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 313*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 3145b49642aSRichard Tran Mills } 3156e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3166e369cd5SRichard Tran Mills } 3176e369cd5SRichard Tran Mills 3186e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 3196e369cd5SRichard Tran Mills { 3206e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3215b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3226e369cd5SRichard Tran Mills 3236e369cd5SRichard Tran Mills PetscFunctionBegin; 3246e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3256e369cd5SRichard Tran Mills 3266e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3276e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3286e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3296e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 330d96e85feSRichard Tran Mills * a lot of code duplication. */ 3316e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 332*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd_SeqAIJ(A, mode)); 3336e369cd5SRichard Tran Mills 3345b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3355b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3365b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3375b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 338*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 3395b49642aSRichard Tran Mills } 340df555b71SRichard Tran Mills 3414a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3424a2a386eSRichard Tran Mills } 3434a2a386eSRichard Tran Mills 344e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 3454a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3464a2a386eSRichard Tran Mills { 3474a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3484a2a386eSRichard Tran Mills const PetscScalar *x; 3494a2a386eSRichard Tran Mills PetscScalar *y; 3504a2a386eSRichard Tran Mills const MatScalar *aa; 3514a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 352db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 353db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 354db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3554a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 356db63039fSRichard Tran Mills char matdescra[6]; 357db63039fSRichard Tran Mills 3584a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 359ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 360ff03dc53SRichard Tran Mills 361ff03dc53SRichard Tran Mills PetscFunctionBegin; 362db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 363db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 364*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 365*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(yy,&y)); 366ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 367ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 368ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 369ff03dc53SRichard Tran Mills 370ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 371db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 372ff03dc53SRichard Tran Mills 373*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 374*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 375*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(yy,&y)); 376ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 377ff03dc53SRichard Tran Mills } 3781950a7e7SRichard Tran Mills #endif 379ff03dc53SRichard Tran Mills 380ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 381df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 382df555b71SRichard Tran Mills { 383df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 384df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 385df555b71SRichard Tran Mills const PetscScalar *x; 386df555b71SRichard Tran Mills PetscScalar *y; 387551aa5c8SRichard Tran Mills PetscObjectState state; 388df555b71SRichard Tran Mills 389df555b71SRichard Tran Mills PetscFunctionBegin; 390df555b71SRichard Tran Mills 39138987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 39238987b35SRichard Tran Mills if (!a->nz) { 393*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(yy,&y)); 394*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(y,A->rmap->n)); 395*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(yy,&y)); 39638987b35SRichard Tran Mills PetscFunctionReturn(0); 39738987b35SRichard Tran Mills } 398f36dfe3fSRichard Tran Mills 399*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 400*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(yy,&y)); 401df555b71SRichard Tran Mills 4023fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4033fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4043fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 405*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 406*5f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 4073fa15762SRichard Tran Mills 408df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 409*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 410df555b71SRichard Tran Mills 411*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 412*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 413*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(yy,&y)); 414df555b71SRichard Tran Mills PetscFunctionReturn(0); 415df555b71SRichard Tran Mills } 416d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 417df555b71SRichard Tran Mills 418e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 419ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 420ff03dc53SRichard Tran Mills { 421ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422ff03dc53SRichard Tran Mills const PetscScalar *x; 423ff03dc53SRichard Tran Mills PetscScalar *y; 424ff03dc53SRichard Tran Mills const MatScalar *aa; 425ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 426db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 427db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 428db63039fSRichard Tran Mills PetscScalar beta = 0.0; 429ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 430db63039fSRichard Tran Mills char matdescra[6]; 431ff03dc53SRichard Tran Mills 432ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 433ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4344a2a386eSRichard Tran Mills 4354a2a386eSRichard Tran Mills PetscFunctionBegin; 436969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 437969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 438*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 439*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(yy,&y)); 4404a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4414a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4424a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4434a2a386eSRichard Tran Mills 4444a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 445db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4464a2a386eSRichard Tran Mills 447*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 448*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 449*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(yy,&y)); 4504a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4514a2a386eSRichard Tran Mills } 4521950a7e7SRichard Tran Mills #endif 4534a2a386eSRichard Tran Mills 454ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 455df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 456df555b71SRichard Tran Mills { 457df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 458df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 459df555b71SRichard Tran Mills const PetscScalar *x; 460df555b71SRichard Tran Mills PetscScalar *y; 461551aa5c8SRichard Tran Mills PetscObjectState state; 462df555b71SRichard Tran Mills 463df555b71SRichard Tran Mills PetscFunctionBegin; 464df555b71SRichard Tran Mills 46538987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 46638987b35SRichard Tran Mills if (!a->nz) { 467*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(yy,&y)); 468*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(y,A->cmap->n)); 469*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(yy,&y)); 47038987b35SRichard Tran Mills PetscFunctionReturn(0); 47138987b35SRichard Tran Mills } 472f36dfe3fSRichard Tran Mills 473*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 474*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(yy,&y)); 475df555b71SRichard Tran Mills 4763fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4773fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4783fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 479*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 480*5f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 4813fa15762SRichard Tran Mills 482df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 483*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 484df555b71SRichard Tran Mills 485*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 486*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 487*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(yy,&y)); 488df555b71SRichard Tran Mills PetscFunctionReturn(0); 489df555b71SRichard Tran Mills } 490d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 491df555b71SRichard Tran Mills 492e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 4934a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 4944a2a386eSRichard Tran Mills { 4954a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4964a2a386eSRichard Tran Mills const PetscScalar *x; 4974a2a386eSRichard Tran Mills PetscScalar *y,*z; 4984a2a386eSRichard Tran Mills const MatScalar *aa; 4994a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 500db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 5014a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 5024a2a386eSRichard Tran Mills PetscInt i; 5034a2a386eSRichard Tran Mills 504ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 505ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 506a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 507db63039fSRichard Tran Mills PetscScalar beta; 508a84739b8SRichard Tran Mills char matdescra[6]; 509ff03dc53SRichard Tran Mills 510ff03dc53SRichard Tran Mills PetscFunctionBegin; 511a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 512a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 513a84739b8SRichard Tran Mills 514*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 515*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayPair(yy,zz,&y,&z)); 516ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 517ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 518ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 519ff03dc53SRichard Tran Mills 520ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 521a84739b8SRichard Tran Mills if (zz == yy) { 522a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 523db63039fSRichard Tran Mills beta = 1.0; 524db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 525a84739b8SRichard Tran Mills } else { 526db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 527db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 528db63039fSRichard Tran Mills beta = 0.0; 529db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 530ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 531ff03dc53SRichard Tran Mills z[i] += y[i]; 532ff03dc53SRichard Tran Mills } 533a84739b8SRichard Tran Mills } 534ff03dc53SRichard Tran Mills 535*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz)); 536*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 537*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayPair(yy,zz,&y,&z)); 538ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 539ff03dc53SRichard Tran Mills } 5401950a7e7SRichard Tran Mills #endif 541ff03dc53SRichard Tran Mills 542ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 543df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 544df555b71SRichard Tran Mills { 545df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 546df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 547df555b71SRichard Tran Mills const PetscScalar *x; 548df555b71SRichard Tran Mills PetscScalar *y,*z; 549df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 550df555b71SRichard Tran Mills PetscInt i; 551df555b71SRichard Tran Mills 552df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 553551aa5c8SRichard Tran Mills PetscObjectState state; 554df555b71SRichard Tran Mills 555df555b71SRichard Tran Mills PetscFunctionBegin; 556df555b71SRichard Tran Mills 55738987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 55838987b35SRichard Tran Mills if (!a->nz) { 559*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayPair(yy,zz,&y,&z)); 560*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(z,y,m)); 561*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayPair(yy,zz,&y,&z)); 56238987b35SRichard Tran Mills PetscFunctionReturn(0); 56338987b35SRichard Tran Mills } 564df555b71SRichard Tran Mills 565*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 566*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayPair(yy,zz,&y,&z)); 567df555b71SRichard Tran Mills 5683fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5693fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5703fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 571*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 572*5f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 5733fa15762SRichard Tran Mills 574df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 575df555b71SRichard Tran Mills if (zz == yy) { 576df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 577df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 578*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 579df555b71SRichard Tran Mills } else { 580df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 581df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 582*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 583*5f80ce2aSJacob Faibussowitsch for (i=0; i<m; i++) z[i] += y[i]; 584df555b71SRichard Tran Mills } 585df555b71SRichard Tran Mills 586*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz)); 587*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 588*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayPair(yy,zz,&y,&z)); 589df555b71SRichard Tran Mills PetscFunctionReturn(0); 590df555b71SRichard Tran Mills } 591d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 592df555b71SRichard Tran Mills 593e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 594ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 595ff03dc53SRichard Tran Mills { 596ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597ff03dc53SRichard Tran Mills const PetscScalar *x; 598ff03dc53SRichard Tran Mills PetscScalar *y,*z; 599ff03dc53SRichard Tran Mills const MatScalar *aa; 600ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 601db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 602ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 603ff03dc53SRichard Tran Mills PetscInt i; 604ff03dc53SRichard Tran Mills 605ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 606ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 607a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 608db63039fSRichard Tran Mills PetscScalar beta; 609a84739b8SRichard Tran Mills char matdescra[6]; 6104a2a386eSRichard Tran Mills 6114a2a386eSRichard Tran Mills PetscFunctionBegin; 612a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 613a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 614a84739b8SRichard Tran Mills 615*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 616*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayPair(yy,zz,&y,&z)); 6174a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6184a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6194a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6204a2a386eSRichard Tran Mills 6214a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 622a84739b8SRichard Tran Mills if (zz == yy) { 623a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 624db63039fSRichard Tran Mills beta = 1.0; 625969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 626a84739b8SRichard Tran Mills } else { 627db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 628db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 629db63039fSRichard Tran Mills beta = 0.0; 630db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 631969800c5SRichard Tran Mills for (i=0; i<n; i++) { 6324a2a386eSRichard Tran Mills z[i] += y[i]; 6334a2a386eSRichard Tran Mills } 634a84739b8SRichard Tran Mills } 6354a2a386eSRichard Tran Mills 636*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz)); 637*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 638*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayPair(yy,zz,&y,&z)); 6394a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6404a2a386eSRichard Tran Mills } 6411950a7e7SRichard Tran Mills #endif 6424a2a386eSRichard Tran Mills 643ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 644df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 645df555b71SRichard Tran Mills { 646df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 647df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 648df555b71SRichard Tran Mills const PetscScalar *x; 649df555b71SRichard Tran Mills PetscScalar *y,*z; 650969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 651df555b71SRichard Tran Mills PetscInt i; 652551aa5c8SRichard Tran Mills PetscObjectState state; 653df555b71SRichard Tran Mills 654df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 655df555b71SRichard Tran Mills 656df555b71SRichard Tran Mills PetscFunctionBegin; 657df555b71SRichard Tran Mills 65838987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 65938987b35SRichard Tran Mills if (!a->nz) { 660*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayPair(yy,zz,&y,&z)); 661*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(z,y,n)); 662*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayPair(yy,zz,&y,&z)); 66338987b35SRichard Tran Mills PetscFunctionReturn(0); 66438987b35SRichard Tran Mills } 665f36dfe3fSRichard Tran Mills 666*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(xx,&x)); 667*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayPair(yy,zz,&y,&z)); 668df555b71SRichard Tran Mills 6693fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6703fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6713fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 672*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 673*5f80ce2aSJacob Faibussowitsch if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 6743fa15762SRichard Tran Mills 675df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 676df555b71SRichard Tran Mills if (zz == yy) { 677df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 678df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 679*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 680df555b71SRichard Tran Mills } else { 681df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 682df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 683*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 684*5f80ce2aSJacob Faibussowitsch for (i=0; i<n; i++) z[i] += y[i]; 685df555b71SRichard Tran Mills } 686df555b71SRichard Tran Mills 687*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*a->nz)); 688*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(xx,&x)); 689*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayPair(yy,zz,&y,&z)); 690df555b71SRichard Tran Mills PetscFunctionReturn(0); 691df555b71SRichard Tran Mills } 692d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 693df555b71SRichard Tran Mills 694190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */ 6958a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 696190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 697431879ecSRichard Tran Mills { 6981495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 699431879ecSRichard Tran Mills sparse_matrix_t csrA,csrB,csrC; 700190ae7a4SRichard Tran Mills PetscInt nrows,ncols; 701431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 702431879ecSRichard Tran Mills PetscObjectState state; 703431879ecSRichard Tran Mills 704431879ecSRichard Tran Mills PetscFunctionBegin; 705190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 706190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 707190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 708190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 709190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 710190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 711190ae7a4SRichard Tran Mills 712*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 713*5f80ce2aSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 714*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)B,&state)); 715*5f80ce2aSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(B)); 716431879ecSRichard Tran Mills csrA = a->csrA; 717431879ecSRichard Tran Mills csrB = b->csrA; 718431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 719431879ecSRichard Tran Mills 720190ae7a4SRichard Tran Mills if (csrA && csrB) { 721*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 722190ae7a4SRichard Tran Mills } else { 723190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 724190ae7a4SRichard Tran Mills } 725190ae7a4SRichard Tran Mills 726*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C)); 727431879ecSRichard Tran Mills 728431879ecSRichard Tran Mills PetscFunctionReturn(0); 729431879ecSRichard Tran Mills } 730431879ecSRichard Tran Mills 731190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 732e8be1fc7SRichard Tran Mills { 7331495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 734e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 735e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 736e8be1fc7SRichard Tran Mills PetscObjectState state; 737e8be1fc7SRichard Tran Mills 738e8be1fc7SRichard Tran Mills PetscFunctionBegin; 739*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 740*5f80ce2aSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 741*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)B,&state)); 742*5f80ce2aSJacob Faibussowitsch if (!b->sparse_optimized || b->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(B)); 743e8be1fc7SRichard Tran Mills csrA = a->csrA; 744e8be1fc7SRichard Tran Mills csrB = b->csrA; 745e8be1fc7SRichard Tran Mills csrC = c->csrA; 746e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 747e8be1fc7SRichard Tran Mills 748190ae7a4SRichard Tran Mills if (csrA && csrB) { 749*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 750190ae7a4SRichard Tran Mills } else { 751190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 752190ae7a4SRichard Tran Mills } 753e8be1fc7SRichard Tran Mills 754e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 755*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_update_from_mkl_handle(C)); 756e8be1fc7SRichard Tran Mills 757e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 758e8be1fc7SRichard Tran Mills } 759e8be1fc7SRichard Tran Mills 760190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 7614f53af40SRichard Tran Mills { 762190ae7a4SRichard Tran Mills PetscFunctionBegin; 763*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 764190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 765190ae7a4SRichard Tran Mills } 766190ae7a4SRichard Tran Mills 767190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 768190ae7a4SRichard Tran Mills { 769190ae7a4SRichard Tran Mills PetscFunctionBegin; 770*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 771190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 772190ae7a4SRichard Tran Mills } 773190ae7a4SRichard Tran Mills 774190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 775190ae7a4SRichard Tran Mills { 776190ae7a4SRichard Tran Mills PetscFunctionBegin; 777*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 778190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 779190ae7a4SRichard Tran Mills } 780190ae7a4SRichard Tran Mills 781190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 782190ae7a4SRichard Tran Mills { 783190ae7a4SRichard Tran Mills PetscFunctionBegin; 784*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 785190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 786190ae7a4SRichard Tran Mills } 787190ae7a4SRichard Tran Mills 788190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 789190ae7a4SRichard Tran Mills { 790190ae7a4SRichard Tran Mills PetscFunctionBegin; 791*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 792190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 793190ae7a4SRichard Tran Mills } 794190ae7a4SRichard Tran Mills 795190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 796190ae7a4SRichard Tran Mills { 797190ae7a4SRichard Tran Mills PetscFunctionBegin; 798*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 799190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 800190ae7a4SRichard Tran Mills } 801190ae7a4SRichard Tran Mills 802190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 803190ae7a4SRichard Tran Mills { 804190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 805190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 806190ae7a4SRichard Tran Mills 807190ae7a4SRichard Tran Mills PetscFunctionBegin; 808*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C)); 809190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 810190ae7a4SRichard Tran Mills } 811190ae7a4SRichard Tran Mills 812190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 813190ae7a4SRichard Tran Mills { 814190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 815190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 816190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 817190ae7a4SRichard Tran Mills 818190ae7a4SRichard Tran Mills PetscFunctionBegin; 819*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C)); 820190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 821190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 822190ae7a4SRichard Tran Mills } 823190ae7a4SRichard Tran Mills 82449ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 825190ae7a4SRichard Tran Mills { 826190ae7a4SRichard Tran Mills Mat Ct; 827190ae7a4SRichard Tran Mills Vec zeros; 8281495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 8294f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 8304f53af40SRichard Tran Mills PetscBool set, flag; 831b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 8324f53af40SRichard Tran Mills PetscObjectState state; 8334f53af40SRichard Tran Mills 8344f53af40SRichard Tran Mills PetscFunctionBegin; 835*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatIsSymmetricKnown(A,&set,&flag)); 8362c71b3e2SJacob Faibussowitsch PetscCheckFalse(!set || (set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 8374f53af40SRichard Tran Mills 838*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 839*5f80ce2aSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 840*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)P,&state)); 841*5f80ce2aSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(P)); 8424f53af40SRichard Tran Mills csrA = a->csrA; 8434f53af40SRichard Tran Mills csrP = p->csrA; 8444f53af40SRichard Tran Mills csrC = c->csrA; 845b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 846190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 847b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 8484f53af40SRichard Tran Mills 849f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 850*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 8514f53af40SRichard Tran Mills 852190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 853190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 854190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 8557301b172SPierre Jolivet * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 856190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 857190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 858190ae7a4SRichard Tran Mills * the full matrix. */ 859*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_update_from_mkl_handle(C)); 860*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct)); 861*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(C,&zeros,NULL)); 862*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSetFromOptions(zeros)); 863*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecZeroEntries(zeros)); 864*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDiagonalSet(Ct,zeros,INSERT_VALUES)); 865*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN)); 866190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 867*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductCreateWithMat(A,P,PETSC_NULL,C)); 868*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetType(C,MATPRODUCT_PtAP)); 869*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_create_mkl_handle(C)); 870*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&zeros)); 871*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Ct)); 8724f53af40SRichard Tran Mills PetscFunctionReturn(0); 8734f53af40SRichard Tran Mills } 874190ae7a4SRichard Tran Mills 875190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 876190ae7a4SRichard Tran Mills { 877190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 878190ae7a4SRichard Tran Mills Mat A = product->A,P = product->B; 8791495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 880190ae7a4SRichard Tran Mills sparse_matrix_t csrA,csrP,csrC; 881190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 882190ae7a4SRichard Tran Mills PetscObjectState state; 883190ae7a4SRichard Tran Mills 884190ae7a4SRichard Tran Mills PetscFunctionBegin; 885*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)A,&state)); 886*5f80ce2aSJacob Faibussowitsch if (!a->sparse_optimized || a->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(A)); 887*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectStateGet((PetscObject)P,&state)); 888*5f80ce2aSJacob Faibussowitsch if (!p->sparse_optimized || p->state != state) CHKERRQ(MatSeqAIJMKL_create_mkl_handle(P)); 889190ae7a4SRichard Tran Mills csrA = a->csrA; 890190ae7a4SRichard Tran Mills csrP = p->csrA; 891190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 892190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 893190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 894190ae7a4SRichard Tran Mills 895190ae7a4SRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 896190ae7a4SRichard Tran Mills if (csrP && csrA) { 897*5f80ce2aSJacob Faibussowitsch PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 898190ae7a4SRichard Tran Mills } else { 899190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 900190ae7a4SRichard Tran Mills } 901190ae7a4SRichard Tran Mills 902190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 903190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 90449ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 90549ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 90649ba5396SRichard Tran Mills * is guaranteed. */ 907*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C)); 908190ae7a4SRichard Tran Mills 909190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 910190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 911190ae7a4SRichard Tran Mills } 912190ae7a4SRichard Tran Mills 913190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 914190ae7a4SRichard Tran Mills { 915190ae7a4SRichard Tran Mills PetscFunctionBegin; 916190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 917190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 918190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 919190ae7a4SRichard Tran Mills } 920190ae7a4SRichard Tran Mills 921190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 922190ae7a4SRichard Tran Mills { 923190ae7a4SRichard Tran Mills PetscFunctionBegin; 924190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 925190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 926190ae7a4SRichard Tran Mills } 927190ae7a4SRichard Tran Mills 928190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 929190ae7a4SRichard Tran Mills { 930190ae7a4SRichard Tran Mills PetscFunctionBegin; 931190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 932190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 933190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 934190ae7a4SRichard Tran Mills } 935190ae7a4SRichard Tran Mills 936190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 937190ae7a4SRichard Tran Mills { 938190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 939190ae7a4SRichard Tran Mills Mat A = product->A; 940190ae7a4SRichard Tran Mills PetscBool set, flag; 941190ae7a4SRichard Tran Mills 942190ae7a4SRichard Tran Mills PetscFunctionBegin; 943a3d67537SPierre Jolivet if (PetscDefined(USE_COMPLEX)) { 9442ab6f6a8SStefano Zampini /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 9452ab6f6a8SStefano Zampini * We do this in several other locations in this file. This works for the time being, but these 946190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 9472ab6f6a8SStefano Zampini * TODO: Add proper MATSEQAIJMKL implementations */ 948190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 949a3d67537SPierre Jolivet } else { 950190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 951*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatIsSymmetricKnown(A,&set,&flag)); 952a3d67537SPierre Jolivet if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 953a3d67537SPierre Jolivet else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 9541495fedeSRichard Tran Mills /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 955190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 956a3d67537SPierre Jolivet } 957190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 958190ae7a4SRichard Tran Mills } 959190ae7a4SRichard Tran Mills 960190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 961190ae7a4SRichard Tran Mills { 962190ae7a4SRichard Tran Mills PetscFunctionBegin; 9632ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 964190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 965190ae7a4SRichard Tran Mills } 966190ae7a4SRichard Tran Mills 967190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 968190ae7a4SRichard Tran Mills { 969190ae7a4SRichard Tran Mills PetscFunctionBegin; 9702ab6f6a8SStefano Zampini C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 971190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 972190ae7a4SRichard Tran Mills } 973190ae7a4SRichard Tran Mills 974190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 975190ae7a4SRichard Tran Mills { 976190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 977190ae7a4SRichard Tran Mills 978190ae7a4SRichard Tran Mills PetscFunctionBegin; 979190ae7a4SRichard Tran Mills switch (product->type) { 980190ae7a4SRichard Tran Mills case MATPRODUCT_AB: 981*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 982190ae7a4SRichard Tran Mills break; 983190ae7a4SRichard Tran Mills case MATPRODUCT_AtB: 984*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 985190ae7a4SRichard Tran Mills break; 986190ae7a4SRichard Tran Mills case MATPRODUCT_ABt: 987*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 988190ae7a4SRichard Tran Mills break; 989190ae7a4SRichard Tran Mills case MATPRODUCT_PtAP: 990*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 991190ae7a4SRichard Tran Mills break; 992190ae7a4SRichard Tran Mills case MATPRODUCT_RARt: 993*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 994190ae7a4SRichard Tran Mills break; 995190ae7a4SRichard Tran Mills case MATPRODUCT_ABC: 996*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 997190ae7a4SRichard Tran Mills break; 998190ae7a4SRichard Tran Mills default: 999190ae7a4SRichard Tran Mills break; 1000190ae7a4SRichard Tran Mills } 1001190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1002190ae7a4SRichard Tran Mills } 1003431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1004190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */ 10054f53af40SRichard Tran Mills 10064a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1007510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 10084a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 10094a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 10104a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 10114a2a386eSRichard Tran Mills { 10124a2a386eSRichard Tran Mills Mat B = *newmat; 10134a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1014c9d46305SRichard Tran Mills PetscBool set; 1015e9c94282SRichard Tran Mills PetscBool sametype; 1016*5f80ce2aSJacob Faibussowitsch PetscErrorCode ierr; 10174a2a386eSRichard Tran Mills 10184a2a386eSRichard Tran Mills PetscFunctionBegin; 1019*5f80ce2aSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) CHKERRQ(MatDuplicate(A,MAT_COPY_VALUES,&B)); 10204a2a386eSRichard Tran Mills 1021*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 1022e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 1023e9c94282SRichard Tran Mills 1024*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNewLog(B,&aijmkl)); 10254a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 10264a2a386eSRichard Tran Mills 1027df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 1028969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 10294a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 10304a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 10314a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 1032c9d46305SRichard Tran Mills 10334abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1034ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1035d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1036a8327b06SKarl Rupp #else 1037d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1038d995685eSRichard Tran Mills #endif 10395b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 10404abfa3b3SRichard Tran Mills 10414abfa3b3SRichard Tran Mills /* Parse command line options. */ 1042c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1043*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set)); 1044*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set)); 1045c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 1046ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1047d995685eSRichard Tran Mills if (!aijmkl->no_SpMV2) { 1048*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1049d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1050d995685eSRichard Tran Mills } 1051d995685eSRichard Tran Mills #endif 1052c9d46305SRichard Tran Mills 1053ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1054df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1055969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1056df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1057969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 10588a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1059190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1060190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1061190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1062190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1063190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1064ffcab697SRichard Tran Mills # if !defined(PETSC_USE_COMPLEX) 106549ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1066190ae7a4SRichard Tran Mills # else 1067190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 10684f53af40SRichard Tran Mills # endif 1069e8be1fc7SRichard Tran Mills # endif 10701950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 10711950a7e7SRichard Tran Mills 1072213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1073213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1074213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1075213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1076213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 10771950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 10784a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1079969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 10804a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1081969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1082c9d46305SRichard Tran Mills } 10831950a7e7SRichard Tran Mills #endif 10844a2a386eSRichard Tran Mills 1085*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ)); 10864a2a386eSRichard Tran Mills 1087*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL)); 10884a2a386eSRichard Tran Mills *newmat = B; 10894a2a386eSRichard Tran Mills PetscFunctionReturn(0); 10904a2a386eSRichard Tran Mills } 10914a2a386eSRichard Tran Mills 10924a2a386eSRichard Tran Mills /*@C 10934a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 10944a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 10954a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 109690147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 109790147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 1098597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1099597ee276SRichard Tran Mills symmetric A) operations are currently supported. 1100597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 110190147e49SRichard Tran Mills 1102d083f849SBarry Smith Collective 11034a2a386eSRichard Tran Mills 11044a2a386eSRichard Tran Mills Input Parameters: 11054a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 11064a2a386eSRichard Tran Mills . m - number of rows 11074a2a386eSRichard Tran Mills . n - number of columns 11084a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 11094a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 11104a2a386eSRichard Tran Mills (possibly different for each row) or NULL 11114a2a386eSRichard Tran Mills 11124a2a386eSRichard Tran Mills Output Parameter: 11134a2a386eSRichard Tran Mills . A - the matrix 11144a2a386eSRichard Tran Mills 111590147e49SRichard Tran Mills Options Database Keys: 111666b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 111766b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 111890147e49SRichard Tran Mills 11194a2a386eSRichard Tran Mills Notes: 11204a2a386eSRichard Tran Mills If nnz is given then nz is ignored 11214a2a386eSRichard Tran Mills 11224a2a386eSRichard Tran Mills Level: intermediate 11234a2a386eSRichard Tran Mills 11244a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 11254a2a386eSRichard Tran Mills @*/ 11264a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 11274a2a386eSRichard Tran Mills { 11284a2a386eSRichard Tran Mills PetscFunctionBegin; 1129*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreate(comm,A)); 1130*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSizes(*A,m,n,m,n)); 1131*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(*A,MATSEQAIJMKL)); 1132*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz)); 11334a2a386eSRichard Tran Mills PetscFunctionReturn(0); 11344a2a386eSRichard Tran Mills } 11354a2a386eSRichard Tran Mills 11364a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 11374a2a386eSRichard Tran Mills { 11384a2a386eSRichard Tran Mills PetscFunctionBegin; 1139*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(A,MATSEQAIJ)); 1140*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 11414a2a386eSRichard Tran Mills PetscFunctionReturn(0); 11424a2a386eSRichard Tran Mills } 1143