1b9e7e5c1SBarry Smith 24a2a386eSRichard Tran Mills /* 34a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 44a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 54a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 64a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 74a2a386eSRichard Tran Mills wherever possible. 84a2a386eSRichard Tran Mills */ 94a2a386eSRichard Tran Mills 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12b9e7e5c1SBarry Smith #include <mkl_spblas.h> 134a2a386eSRichard Tran Mills 144a2a386eSRichard Tran Mills typedef struct { 15c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 165b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 174abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18551aa5c8SRichard Tran Mills PetscObjectState state; 19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills PetscErrorCode ierr; 324a2a386eSRichard Tran Mills Mat B = *newmat; 33ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 344a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35c1d5218aSRichard Tran Mills #endif 364a2a386eSRichard Tran Mills 374a2a386eSRichard Tran Mills PetscFunctionBegin; 384a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 394a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 404a2a386eSRichard Tran Mills } 414a2a386eSRichard Tran Mills 424a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4354871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 444a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4854871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 49ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51431879ecSRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52e8be1fc7SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 554f53af40SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 564a2a386eSRichard Tran Mills 57e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 584222ddf1SHong Zhang 59ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60190ae7a4SRichard Tran Mills if (!aijmkl->no_SpMV2) { 61190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 62190ae7a4SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63190ae7a4SRichard Tran Mills #endif 64190ae7a4SRichard Tran Mills } 65190ae7a4SRichard Tran Mills 664abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 68e9c94282SRichard Tran Mills * the spptr pointer. */ 69a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 70a8327b06SKarl Rupp 714abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 720632b357SRichard Tran Mills sparse_status_t stat; 734abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 74db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 754abfa3b3SRichard Tran Mills } 766718818eSStefano Zampini #endif 77e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 784a2a386eSRichard Tran Mills 794a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 804a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 814a2a386eSRichard Tran Mills 824a2a386eSRichard Tran Mills *newmat = B; 834a2a386eSRichard Tran Mills PetscFunctionReturn(0); 844a2a386eSRichard Tran Mills } 854a2a386eSRichard Tran Mills 864a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 874a2a386eSRichard Tran Mills { 884a2a386eSRichard Tran Mills PetscErrorCode ierr; 894a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 904a2a386eSRichard Tran Mills 914a2a386eSRichard Tran Mills PetscFunctionBegin; 92e9c94282SRichard Tran Mills 93e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 94e9c94282SRichard Tran Mills * spptr pointer. */ 95e9c94282SRichard Tran Mills if (aijmkl) { 964a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 97ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 984abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 994abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 1004abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 101db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 1024abfa3b3SRichard Tran Mills } 1034abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1044a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 105e9c94282SRichard Tran Mills } 1064a2a386eSRichard Tran Mills 1074a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1084a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1094a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1104a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1114a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1124a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1134a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1144a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1154a2a386eSRichard Tran Mills } 1164a2a386eSRichard Tran Mills 117190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1185b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1195b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1205b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1215b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1225b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1235b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1246e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1254a2a386eSRichard Tran Mills { 126ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1276e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1286e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1296e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 13045fbe478SRichard Tran Mills PetscFunctionBegin; 1316e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1326e369cd5SRichard Tran Mills #else 133a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 134a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 135a8327b06SKarl Rupp PetscInt m,n; 136a8327b06SKarl Rupp MatScalar *aa; 137a8327b06SKarl Rupp PetscInt *aj,*ai; 1386e369cd5SRichard Tran Mills sparse_status_t stat; 139551aa5c8SRichard Tran Mills PetscErrorCode ierr; 1404a2a386eSRichard Tran Mills 141a8327b06SKarl Rupp PetscFunctionBegin; 142e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 143e626a176SRichard Tran Mills /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 144e626a176SRichard Tran Mills * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 145e626a176SRichard Tran Mills * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 146e626a176SRichard Tran Mills * mkl_sparse_optimize() later. */ 1476e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1484d51fa23SRichard Tran Mills #endif 1496e369cd5SRichard Tran Mills 1500632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1510632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1520632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1530632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 154db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 1550632b357SRichard Tran Mills } 1568d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1576e369cd5SRichard Tran Mills 158c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 159df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 16258678438SRichard Tran Mills m = A->rmap->n; 16358678438SRichard Tran Mills n = A->cmap->n; 164df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 166df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 1671495fedeSRichard Tran Mills if (a->nz && aa && !A->structure_only) { 1688d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1698d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 17058678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 172df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 173db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 174df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 1761950a7e7SRichard Tran Mills if (!aijmkl->no_SpMV2) { 177df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 178db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 1791950a7e7SRichard Tran Mills } 1804abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 181e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 182190ae7a4SRichard Tran Mills } else { 183190ae7a4SRichard Tran Mills aijmkl->csrA = PETSC_NULL; 184c9d46305SRichard Tran Mills } 1856e369cd5SRichard Tran Mills 1866e369cd5SRichard Tran Mills PetscFunctionReturn(0); 187d995685eSRichard Tran Mills #endif 1886e369cd5SRichard Tran Mills } 1896e369cd5SRichard Tran Mills 190*b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 191190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 192190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 19319afcda9SRichard Tran Mills { 19419afcda9SRichard Tran Mills PetscErrorCode ierr; 19519afcda9SRichard Tran Mills sparse_status_t stat; 19619afcda9SRichard Tran Mills sparse_index_base_t indexing; 197190ae7a4SRichard Tran Mills PetscInt m,n; 19845fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 19919afcda9SRichard Tran Mills MatScalar *aa; 20019afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 20119afcda9SRichard Tran Mills 202190ae7a4SRichard Tran Mills if (csrA) { 20345fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 204190ae7a4SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 2059c46acdfSRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 206190ae7a4SRichard Tran Mills if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 207190ae7a4SRichard Tran Mills } else { 208190ae7a4SRichard Tran Mills aj = ai = PETSC_NULL; 209190ae7a4SRichard Tran Mills aa = PETSC_NULL; 210aab60f1bSRichard Tran Mills } 211190ae7a4SRichard Tran Mills 21219afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 21345fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 214aab60f1bSRichard Tran Mills /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 215aab60f1bSRichard Tran Mills * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 216aab60f1bSRichard Tran Mills * they will be destroyed when the MKL handle is destroyed. 217aab60f1bSRichard Tran Mills * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 218190ae7a4SRichard Tran Mills if (csrA) { 219190ae7a4SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 220190ae7a4SRichard Tran Mills } else { 221190ae7a4SRichard Tran Mills /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 222190ae7a4SRichard Tran Mills ierr = MatSetUp(A);CHKERRQ(ierr); 223190ae7a4SRichard Tran Mills ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224190ae7a4SRichard Tran Mills ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225190ae7a4SRichard Tran Mills } 22619afcda9SRichard Tran Mills 22719afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 22819afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 22919afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2306c87cf42SRichard Tran Mills 23119afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 23219afcda9SRichard Tran Mills aijmkl->csrA = csrA; 2336c87cf42SRichard Tran Mills 23419afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 23519afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 23619afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 237f3fd1758SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 238f3fd1758SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 239f3fd1758SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 240190ae7a4SRichard Tran Mills if (csrA) { 24119afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 242db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 24319afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 244db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 2451950a7e7SRichard Tran Mills } 246e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 24719afcda9SRichard Tran Mills PetscFunctionReturn(0); 24819afcda9SRichard Tran Mills } 249*b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 250190ae7a4SRichard Tran Mills 251e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 252e8be1fc7SRichard Tran Mills * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 253e8be1fc7SRichard Tran Mills * MatMatMultNumeric(). */ 254*b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 255190ae7a4SRichard Tran Mills static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 256e8be1fc7SRichard Tran Mills { 257e8be1fc7SRichard Tran Mills PetscInt i; 258e8be1fc7SRichard Tran Mills PetscInt nrows,ncols; 259e8be1fc7SRichard Tran Mills PetscInt nz; 260e8be1fc7SRichard Tran Mills PetscInt *ai,*aj,*dummy; 261e8be1fc7SRichard Tran Mills PetscScalar *aa; 262e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 2631495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 264e8be1fc7SRichard Tran Mills sparse_status_t stat; 265e8be1fc7SRichard Tran Mills sparse_index_base_t indexing; 266e8be1fc7SRichard Tran Mills 267190ae7a4SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 268190ae7a4SRichard Tran Mills if (!aijmkl->csrA) PetscFunctionReturn(0); 269190ae7a4SRichard Tran Mills 270e8be1fc7SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 271e8be1fc7SRichard Tran Mills stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 272e8be1fc7SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 273e8be1fc7SRichard Tran Mills 274e8be1fc7SRichard Tran Mills /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 275e8be1fc7SRichard Tran Mills * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 276e8be1fc7SRichard Tran Mills for (i=0; i<nrows; i++) { 277e8be1fc7SRichard Tran Mills nz = ai[i+1] - ai[i]; 278e8be1fc7SRichard Tran Mills ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 279e8be1fc7SRichard Tran Mills } 280e8be1fc7SRichard Tran Mills 281e8be1fc7SRichard Tran Mills ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 282e8be1fc7SRichard Tran Mills ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 283e8be1fc7SRichard Tran Mills 284e995cf24SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 285e995cf24SRichard Tran Mills /* We mark our matrix as having a valid, optimized MKL handle. 286e995cf24SRichard Tran Mills * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 287e995cf24SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 288e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 289e8be1fc7SRichard Tran Mills } 290*b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 291e8be1fc7SRichard Tran Mills 2923849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 2933849ddb2SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 2943849ddb2SRichard Tran Mills { 2953849ddb2SRichard Tran Mills PetscInt i,j,k; 2963849ddb2SRichard Tran Mills PetscInt nrows,ncols; 2973849ddb2SRichard Tran Mills PetscInt nz; 2983849ddb2SRichard Tran Mills PetscInt *ai,*aj,*dummy; 2993849ddb2SRichard Tran Mills PetscScalar *aa; 3003849ddb2SRichard Tran Mills PetscErrorCode ierr; 3011495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3023849ddb2SRichard Tran Mills sparse_status_t stat; 3033849ddb2SRichard Tran Mills sparse_index_base_t indexing; 3043849ddb2SRichard Tran Mills 3053849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 3063849ddb2SRichard Tran Mills 3073849ddb2SRichard Tran Mills /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 3083849ddb2SRichard Tran Mills if (!aijmkl->csrA) { 3093849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 3103849ddb2SRichard Tran Mills PetscFunctionReturn(0); 3113849ddb2SRichard Tran Mills } 3123849ddb2SRichard Tran Mills 3133849ddb2SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 3143849ddb2SRichard Tran Mills stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 3153849ddb2SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 3163849ddb2SRichard Tran Mills 3173849ddb2SRichard Tran Mills k = 0; 3183849ddb2SRichard Tran Mills for (i=0; i<nrows; i++) { 3193849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 3203849ddb2SRichard Tran Mills nz = ai[i+1] - ai[i]; 3213849ddb2SRichard Tran Mills for (j=0; j<nz; j++) { 3223849ddb2SRichard Tran Mills if (aa) { 3233849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],aa[k]);CHKERRQ(ierr); 3243849ddb2SRichard Tran Mills } else { 3253849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 3263849ddb2SRichard Tran Mills } 3273849ddb2SRichard Tran Mills k++; 3283849ddb2SRichard Tran Mills } 3293849ddb2SRichard Tran Mills ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 3303849ddb2SRichard Tran Mills } 3313849ddb2SRichard Tran Mills PetscFunctionReturn(0); 3323849ddb2SRichard Tran Mills } 3333849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 3343849ddb2SRichard Tran Mills 3356e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 3366e369cd5SRichard Tran Mills { 3376e369cd5SRichard Tran Mills PetscErrorCode ierr; 3381495fedeSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3396e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 3406e369cd5SRichard Tran Mills 3416e369cd5SRichard Tran Mills PetscFunctionBegin; 3426e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 3436e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 344580bdb30SBarry Smith ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 3456e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 3465b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3476e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3485b49642aSRichard Tran Mills } 3496e369cd5SRichard Tran Mills PetscFunctionReturn(0); 3506e369cd5SRichard Tran Mills } 3516e369cd5SRichard Tran Mills 3526e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 3536e369cd5SRichard Tran Mills { 3546e369cd5SRichard Tran Mills PetscErrorCode ierr; 3556e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3565b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 3576e369cd5SRichard Tran Mills 3586e369cd5SRichard Tran Mills PetscFunctionBegin; 3596e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 3606e369cd5SRichard Tran Mills 3616e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 3626e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 3636e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 3646e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 365d96e85feSRichard Tran Mills * a lot of code duplication. */ 3666e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 3676e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 3686e369cd5SRichard Tran Mills 3695b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 3705b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 3715b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*)A->spptr; 3725b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 3736e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 3745b49642aSRichard Tran Mills } 375df555b71SRichard Tran Mills 3764a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3774a2a386eSRichard Tran Mills } 3784a2a386eSRichard Tran Mills 379e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 3804a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 3814a2a386eSRichard Tran Mills { 3824a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3834a2a386eSRichard Tran Mills const PetscScalar *x; 3844a2a386eSRichard Tran Mills PetscScalar *y; 3854a2a386eSRichard Tran Mills const MatScalar *aa; 3864a2a386eSRichard Tran Mills PetscErrorCode ierr; 3874a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 388db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 389db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 390db63039fSRichard Tran Mills PetscScalar beta = 0.0; 3914a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 392db63039fSRichard Tran Mills char matdescra[6]; 393db63039fSRichard Tran Mills 3944a2a386eSRichard Tran Mills 3954a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 396ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 397ff03dc53SRichard Tran Mills 398ff03dc53SRichard Tran Mills PetscFunctionBegin; 399db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 400db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 401ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 402ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 403ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 404ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 405ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 406ff03dc53SRichard Tran Mills 407ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 408db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 409ff03dc53SRichard Tran Mills 410ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 411ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 412ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 413ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 414ff03dc53SRichard Tran Mills } 4151950a7e7SRichard Tran Mills #endif 416ff03dc53SRichard Tran Mills 417ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 418df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 419df555b71SRichard Tran Mills { 420df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 421df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 422df555b71SRichard Tran Mills const PetscScalar *x; 423df555b71SRichard Tran Mills PetscScalar *y; 424df555b71SRichard Tran Mills PetscErrorCode ierr; 425df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 426551aa5c8SRichard Tran Mills PetscObjectState state; 427df555b71SRichard Tran Mills 428df555b71SRichard Tran Mills PetscFunctionBegin; 429df555b71SRichard Tran Mills 43038987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 43138987b35SRichard Tran Mills if(!a->nz) { 43238987b35SRichard Tran Mills PetscInt i; 43338987b35SRichard Tran Mills PetscInt m=A->rmap->n; 43438987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 43538987b35SRichard Tran Mills for (i=0; i<m; i++) { 43638987b35SRichard Tran Mills y[i] = 0.0; 43738987b35SRichard Tran Mills } 43838987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 43938987b35SRichard Tran Mills PetscFunctionReturn(0); 44038987b35SRichard Tran Mills } 441f36dfe3fSRichard Tran Mills 442df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 443df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 444df555b71SRichard Tran Mills 4453fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4463fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4473fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 448551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 449551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 4503fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4513fa15762SRichard Tran Mills } 4523fa15762SRichard Tran Mills 453df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 454df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 455db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 456df555b71SRichard Tran Mills 457df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 458df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 459df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 460df555b71SRichard Tran Mills PetscFunctionReturn(0); 461df555b71SRichard Tran Mills } 462d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 463df555b71SRichard Tran Mills 464e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 465ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 466ff03dc53SRichard Tran Mills { 467ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 468ff03dc53SRichard Tran Mills const PetscScalar *x; 469ff03dc53SRichard Tran Mills PetscScalar *y; 470ff03dc53SRichard Tran Mills const MatScalar *aa; 471ff03dc53SRichard Tran Mills PetscErrorCode ierr; 472ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 473db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 474db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 475db63039fSRichard Tran Mills PetscScalar beta = 0.0; 476ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 477db63039fSRichard Tran Mills char matdescra[6]; 478ff03dc53SRichard Tran Mills 479ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 480ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 4814a2a386eSRichard Tran Mills 4824a2a386eSRichard Tran Mills PetscFunctionBegin; 483969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 484969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 4854a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 4864a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 4874a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 4884a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 4894a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 4904a2a386eSRichard Tran Mills 4914a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 492db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 4934a2a386eSRichard Tran Mills 4944a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 4954a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 4964a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 4974a2a386eSRichard Tran Mills PetscFunctionReturn(0); 4984a2a386eSRichard Tran Mills } 4991950a7e7SRichard Tran Mills #endif 5004a2a386eSRichard Tran Mills 501ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 502df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 503df555b71SRichard Tran Mills { 504df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 506df555b71SRichard Tran Mills const PetscScalar *x; 507df555b71SRichard Tran Mills PetscScalar *y; 508df555b71SRichard Tran Mills PetscErrorCode ierr; 5090632b357SRichard Tran Mills sparse_status_t stat; 510551aa5c8SRichard Tran Mills PetscObjectState state; 511df555b71SRichard Tran Mills 512df555b71SRichard Tran Mills PetscFunctionBegin; 513df555b71SRichard Tran Mills 51438987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 51538987b35SRichard Tran Mills if(!a->nz) { 51638987b35SRichard Tran Mills PetscInt i; 51738987b35SRichard Tran Mills PetscInt n=A->cmap->n; 51838987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 51938987b35SRichard Tran Mills for (i=0; i<n; i++) { 52038987b35SRichard Tran Mills y[i] = 0.0; 52138987b35SRichard Tran Mills } 52238987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 52338987b35SRichard Tran Mills PetscFunctionReturn(0); 52438987b35SRichard Tran Mills } 525f36dfe3fSRichard Tran Mills 526df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 527df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 528df555b71SRichard Tran Mills 5293fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5303fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5313fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 532551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 533551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 5343fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5353fa15762SRichard Tran Mills } 5363fa15762SRichard Tran Mills 537df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 538df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 539db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 540df555b71SRichard Tran Mills 541df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 542df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 543df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 544df555b71SRichard Tran Mills PetscFunctionReturn(0); 545df555b71SRichard Tran Mills } 546d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 547df555b71SRichard Tran Mills 548e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 5494a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 5504a2a386eSRichard Tran Mills { 5514a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5524a2a386eSRichard Tran Mills const PetscScalar *x; 5534a2a386eSRichard Tran Mills PetscScalar *y,*z; 5544a2a386eSRichard Tran Mills const MatScalar *aa; 5554a2a386eSRichard Tran Mills PetscErrorCode ierr; 5564a2a386eSRichard Tran Mills PetscInt m = A->rmap->n; 557db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 5584a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 5594a2a386eSRichard Tran Mills PetscInt i; 5604a2a386eSRichard Tran Mills 561ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 562ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 563a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 564db63039fSRichard Tran Mills PetscScalar beta; 565a84739b8SRichard Tran Mills char matdescra[6]; 566ff03dc53SRichard Tran Mills 567ff03dc53SRichard Tran Mills PetscFunctionBegin; 568a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 569a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 570a84739b8SRichard Tran Mills 571ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 572ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 573ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 574ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 575ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 576ff03dc53SRichard Tran Mills 577ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 578a84739b8SRichard Tran Mills if (zz == yy) { 579a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 580db63039fSRichard Tran Mills beta = 1.0; 581db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 582a84739b8SRichard Tran Mills } else { 583db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 584db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 585db63039fSRichard Tran Mills beta = 0.0; 586db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 587ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 588ff03dc53SRichard Tran Mills z[i] += y[i]; 589ff03dc53SRichard Tran Mills } 590a84739b8SRichard Tran Mills } 591ff03dc53SRichard Tran Mills 592ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 593ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 594ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 595ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 596ff03dc53SRichard Tran Mills } 5971950a7e7SRichard Tran Mills #endif 598ff03dc53SRichard Tran Mills 599ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 600df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 601df555b71SRichard Tran Mills { 602df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 603df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 604df555b71SRichard Tran Mills const PetscScalar *x; 605df555b71SRichard Tran Mills PetscScalar *y,*z; 606df555b71SRichard Tran Mills PetscErrorCode ierr; 607df555b71SRichard Tran Mills PetscInt m = A->rmap->n; 608df555b71SRichard Tran Mills PetscInt i; 609df555b71SRichard Tran Mills 610df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 611df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 612551aa5c8SRichard Tran Mills PetscObjectState state; 613df555b71SRichard Tran Mills 614df555b71SRichard Tran Mills PetscFunctionBegin; 615df555b71SRichard Tran Mills 61638987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 61738987b35SRichard Tran Mills if(!a->nz) { 61838987b35SRichard Tran Mills PetscInt i; 61938987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 62038987b35SRichard Tran Mills for (i=0; i<m; i++) { 62138987b35SRichard Tran Mills z[i] = y[i]; 62238987b35SRichard Tran Mills } 62338987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 62438987b35SRichard Tran Mills PetscFunctionReturn(0); 62538987b35SRichard Tran Mills } 626df555b71SRichard Tran Mills 627df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 628df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 629df555b71SRichard Tran Mills 6303fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6313fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6323fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 633551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 634551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 6353fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 6363fa15762SRichard Tran Mills } 6373fa15762SRichard Tran Mills 638df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 639df555b71SRichard Tran Mills if (zz == yy) { 640df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 641df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 642db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 643db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 644df555b71SRichard Tran Mills } else { 645df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 646df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 647db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 648db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 649df555b71SRichard Tran Mills for (i=0; i<m; i++) { 650df555b71SRichard Tran Mills z[i] += y[i]; 651df555b71SRichard Tran Mills } 652df555b71SRichard Tran Mills } 653df555b71SRichard Tran Mills 654df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 655df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 656df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 657df555b71SRichard Tran Mills PetscFunctionReturn(0); 658df555b71SRichard Tran Mills } 659d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 660df555b71SRichard Tran Mills 661e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 662ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 663ff03dc53SRichard Tran Mills { 664ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 665ff03dc53SRichard Tran Mills const PetscScalar *x; 666ff03dc53SRichard Tran Mills PetscScalar *y,*z; 667ff03dc53SRichard Tran Mills const MatScalar *aa; 668ff03dc53SRichard Tran Mills PetscErrorCode ierr; 669ff03dc53SRichard Tran Mills PetscInt m = A->rmap->n; 670db63039fSRichard Tran Mills PetscInt n = A->cmap->n; 671ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 672ff03dc53SRichard Tran Mills PetscInt i; 673ff03dc53SRichard Tran Mills 674ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 675ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 676a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 677db63039fSRichard Tran Mills PetscScalar beta; 678a84739b8SRichard Tran Mills char matdescra[6]; 6794a2a386eSRichard Tran Mills 6804a2a386eSRichard Tran Mills PetscFunctionBegin; 681a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 682a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 683a84739b8SRichard Tran Mills 6844a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 6854a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 6864a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 6874a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 6884a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 6894a2a386eSRichard Tran Mills 6904a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 691a84739b8SRichard Tran Mills if (zz == yy) { 692a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 693db63039fSRichard Tran Mills beta = 1.0; 694969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 695a84739b8SRichard Tran Mills } else { 696db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 697db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 698db63039fSRichard Tran Mills beta = 0.0; 699db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 700969800c5SRichard Tran Mills for (i=0; i<n; i++) { 7014a2a386eSRichard Tran Mills z[i] += y[i]; 7024a2a386eSRichard Tran Mills } 703a84739b8SRichard Tran Mills } 7044a2a386eSRichard Tran Mills 7054a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 7064a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 7074a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 7084a2a386eSRichard Tran Mills PetscFunctionReturn(0); 7094a2a386eSRichard Tran Mills } 7101950a7e7SRichard Tran Mills #endif 7114a2a386eSRichard Tran Mills 712ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 713df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 714df555b71SRichard Tran Mills { 715df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 716df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 717df555b71SRichard Tran Mills const PetscScalar *x; 718df555b71SRichard Tran Mills PetscScalar *y,*z; 719df555b71SRichard Tran Mills PetscErrorCode ierr; 720969800c5SRichard Tran Mills PetscInt n = A->cmap->n; 721df555b71SRichard Tran Mills PetscInt i; 722551aa5c8SRichard Tran Mills PetscObjectState state; 723df555b71SRichard Tran Mills 724df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 725df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 726df555b71SRichard Tran Mills 727df555b71SRichard Tran Mills PetscFunctionBegin; 728df555b71SRichard Tran Mills 72938987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 73038987b35SRichard Tran Mills if(!a->nz) { 73138987b35SRichard Tran Mills PetscInt i; 73238987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 73338987b35SRichard Tran Mills for (i=0; i<n; i++) { 73438987b35SRichard Tran Mills z[i] = y[i]; 73538987b35SRichard Tran Mills } 73638987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 73738987b35SRichard Tran Mills PetscFunctionReturn(0); 73838987b35SRichard Tran Mills } 739f36dfe3fSRichard Tran Mills 740df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 741df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 742df555b71SRichard Tran Mills 7433fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 7443fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 7453fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 746551aa5c8SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 747551aa5c8SRichard Tran Mills if (!aijmkl->sparse_optimized || aijmkl->state != state) { 7483fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 7493fa15762SRichard Tran Mills } 7503fa15762SRichard Tran Mills 751df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 752df555b71SRichard Tran Mills if (zz == yy) { 753df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 754df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 755db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 756db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 757df555b71SRichard Tran Mills } else { 758df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 759df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 760db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 761db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 762969800c5SRichard Tran Mills for (i=0; i<n; i++) { 763df555b71SRichard Tran Mills z[i] += y[i]; 764df555b71SRichard Tran Mills } 765df555b71SRichard Tran Mills } 766df555b71SRichard Tran Mills 767df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 768df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 769df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 770df555b71SRichard Tran Mills PetscFunctionReturn(0); 771df555b71SRichard Tran Mills } 772d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 773df555b71SRichard Tran Mills 774190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */ 7758a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 776190ae7a4SRichard Tran Mills static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 777431879ecSRichard Tran Mills { 7781495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 779431879ecSRichard Tran Mills sparse_matrix_t csrA,csrB,csrC; 780190ae7a4SRichard Tran Mills PetscInt nrows,ncols; 781431879ecSRichard Tran Mills PetscErrorCode ierr; 782431879ecSRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 783431879ecSRichard Tran Mills struct matrix_descr descr_type_gen; 784431879ecSRichard Tran Mills PetscObjectState state; 785431879ecSRichard Tran Mills 786431879ecSRichard Tran Mills PetscFunctionBegin; 787190ae7a4SRichard Tran Mills /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 788190ae7a4SRichard Tran Mills * not handle sparse matrices with zero rows or columns. */ 789190ae7a4SRichard Tran Mills if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 790190ae7a4SRichard Tran Mills else nrows = A->cmap->N; 791190ae7a4SRichard Tran Mills if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 792190ae7a4SRichard Tran Mills else ncols = B->rmap->N; 793190ae7a4SRichard Tran Mills 794431879ecSRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 795431879ecSRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 796431879ecSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 797431879ecSRichard Tran Mills } 798431879ecSRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 799431879ecSRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 800431879ecSRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 801431879ecSRichard Tran Mills } 802431879ecSRichard Tran Mills csrA = a->csrA; 803431879ecSRichard Tran Mills csrB = b->csrA; 804431879ecSRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 805431879ecSRichard Tran Mills 806190ae7a4SRichard Tran Mills if (csrA && csrB) { 807190ae7a4SRichard Tran Mills stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 808db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 809190ae7a4SRichard Tran Mills } else { 810190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 811190ae7a4SRichard Tran Mills } 812190ae7a4SRichard Tran Mills 813190ae7a4SRichard Tran Mills ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 814431879ecSRichard Tran Mills 815431879ecSRichard Tran Mills PetscFunctionReturn(0); 816431879ecSRichard Tran Mills } 817431879ecSRichard Tran Mills 818190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 819e8be1fc7SRichard Tran Mills { 8201495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 821e8be1fc7SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 822e8be1fc7SRichard Tran Mills PetscErrorCode ierr; 823e8be1fc7SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 824e8be1fc7SRichard Tran Mills struct matrix_descr descr_type_gen; 825e8be1fc7SRichard Tran Mills PetscObjectState state; 826e8be1fc7SRichard Tran Mills 827e8be1fc7SRichard Tran Mills PetscFunctionBegin; 828e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 829e8be1fc7SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 830e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 831e8be1fc7SRichard Tran Mills } 832e8be1fc7SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 833e8be1fc7SRichard Tran Mills if (!b->sparse_optimized || b->state != state) { 834e8be1fc7SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 835e8be1fc7SRichard Tran Mills } 836e8be1fc7SRichard Tran Mills csrA = a->csrA; 837e8be1fc7SRichard Tran Mills csrB = b->csrA; 838e8be1fc7SRichard Tran Mills csrC = c->csrA; 839e8be1fc7SRichard Tran Mills descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 840e8be1fc7SRichard Tran Mills 841190ae7a4SRichard Tran Mills if (csrA && csrB) { 842190ae7a4SRichard Tran Mills stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 843db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 844190ae7a4SRichard Tran Mills } else { 845190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 846190ae7a4SRichard Tran Mills } 847e8be1fc7SRichard Tran Mills 848e8be1fc7SRichard Tran Mills /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 8494f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 850e8be1fc7SRichard Tran Mills 851e8be1fc7SRichard Tran Mills PetscFunctionReturn(0); 852e8be1fc7SRichard Tran Mills } 853e8be1fc7SRichard Tran Mills 854190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 8554f53af40SRichard Tran Mills { 856190ae7a4SRichard Tran Mills PetscErrorCode ierr; 857190ae7a4SRichard Tran Mills 858190ae7a4SRichard Tran Mills PetscFunctionBegin; 859190ae7a4SRichard Tran Mills ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 860190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 861190ae7a4SRichard Tran Mills } 862190ae7a4SRichard Tran Mills 863190ae7a4SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 864190ae7a4SRichard Tran Mills { 865190ae7a4SRichard Tran Mills PetscErrorCode ierr; 866190ae7a4SRichard Tran Mills 867190ae7a4SRichard Tran Mills PetscFunctionBegin; 868190ae7a4SRichard Tran Mills ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 869190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 870190ae7a4SRichard Tran Mills } 871190ae7a4SRichard Tran Mills 872190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 873190ae7a4SRichard Tran Mills { 874190ae7a4SRichard Tran Mills PetscErrorCode ierr; 875190ae7a4SRichard Tran Mills 876190ae7a4SRichard Tran Mills PetscFunctionBegin; 877190ae7a4SRichard Tran Mills ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 878190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 879190ae7a4SRichard Tran Mills } 880190ae7a4SRichard Tran Mills 881190ae7a4SRichard Tran Mills PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 882190ae7a4SRichard Tran Mills { 883190ae7a4SRichard Tran Mills PetscErrorCode ierr; 884190ae7a4SRichard Tran Mills 885190ae7a4SRichard Tran Mills PetscFunctionBegin; 886190ae7a4SRichard Tran Mills ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 887190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 888190ae7a4SRichard Tran Mills } 889190ae7a4SRichard Tran Mills 890190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 891190ae7a4SRichard Tran Mills { 892190ae7a4SRichard Tran Mills PetscErrorCode ierr; 893190ae7a4SRichard Tran Mills 894190ae7a4SRichard Tran Mills PetscFunctionBegin; 895190ae7a4SRichard Tran Mills ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 896190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 897190ae7a4SRichard Tran Mills } 898190ae7a4SRichard Tran Mills 899190ae7a4SRichard Tran Mills PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 900190ae7a4SRichard Tran Mills { 901190ae7a4SRichard Tran Mills PetscErrorCode ierr; 902190ae7a4SRichard Tran Mills 903190ae7a4SRichard Tran Mills PetscFunctionBegin; 904190ae7a4SRichard Tran Mills ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 905190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 906190ae7a4SRichard Tran Mills } 907190ae7a4SRichard Tran Mills 908190ae7a4SRichard Tran Mills static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 909190ae7a4SRichard Tran Mills { 910190ae7a4SRichard Tran Mills PetscErrorCode ierr; 911190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 912190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 913190ae7a4SRichard Tran Mills 914190ae7a4SRichard Tran Mills PetscFunctionBegin; 915190ae7a4SRichard Tran Mills ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 916190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 917190ae7a4SRichard Tran Mills } 918190ae7a4SRichard Tran Mills 919190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 920190ae7a4SRichard Tran Mills { 921190ae7a4SRichard Tran Mills PetscErrorCode ierr; 922190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 923190ae7a4SRichard Tran Mills Mat A = product->A,B = product->B; 924190ae7a4SRichard Tran Mills PetscReal fill = product->fill; 925190ae7a4SRichard Tran Mills 926190ae7a4SRichard Tran Mills PetscFunctionBegin; 927190ae7a4SRichard Tran Mills ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 928190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 929190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 930190ae7a4SRichard Tran Mills } 931190ae7a4SRichard Tran Mills 93249ba5396SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 933190ae7a4SRichard Tran Mills { 934190ae7a4SRichard Tran Mills Mat Ct; 935190ae7a4SRichard Tran Mills Vec zeros; 9361495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 9374f53af40SRichard Tran Mills sparse_matrix_t csrA, csrP, csrC; 9384f53af40SRichard Tran Mills PetscBool set, flag; 9394f53af40SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 940b9e1dd46SRichard Tran Mills struct matrix_descr descr_type_sym; 9414f53af40SRichard Tran Mills PetscObjectState state; 9424f53af40SRichard Tran Mills PetscErrorCode ierr; 9434f53af40SRichard Tran Mills 9444f53af40SRichard Tran Mills PetscFunctionBegin; 9454f53af40SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag); 94649ba5396SRichard Tran Mills if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 9474f53af40SRichard Tran Mills 9484f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 9494f53af40SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 9504f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 9514f53af40SRichard Tran Mills } 9524f53af40SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 9534f53af40SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 9544f53af40SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 9554f53af40SRichard Tran Mills } 9564f53af40SRichard Tran Mills csrA = a->csrA; 9574f53af40SRichard Tran Mills csrP = p->csrA; 9584f53af40SRichard Tran Mills csrC = c->csrA; 959b9e1dd46SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 960190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 961b9e1dd46SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 9624f53af40SRichard Tran Mills 963f8990b4aSRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 964b9e1dd46SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 965db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 9664f53af40SRichard Tran Mills 967190ae7a4SRichard Tran Mills /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 968190ae7a4SRichard Tran Mills * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 969190ae7a4SRichard Tran Mills * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 970190ae7a4SRichard Tran Mills * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 971190ae7a4SRichard Tran Mills * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 972190ae7a4SRichard Tran Mills * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 973190ae7a4SRichard Tran Mills * the full matrix. */ 9744f53af40SRichard Tran Mills ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 975190ae7a4SRichard Tran Mills ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 976190ae7a4SRichard Tran Mills ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 977190ae7a4SRichard Tran Mills ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 978190ae7a4SRichard Tran Mills ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 979190ae7a4SRichard Tran Mills ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 980190ae7a4SRichard Tran Mills ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 981190ae7a4SRichard Tran Mills /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 982190ae7a4SRichard Tran Mills ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 9831495fedeSRichard Tran Mills ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 984190ae7a4SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 985190ae7a4SRichard Tran Mills ierr = VecDestroy(&zeros);CHKERRQ(ierr); 986190ae7a4SRichard Tran Mills ierr = MatDestroy(&Ct);CHKERRQ(ierr); 9874f53af40SRichard Tran Mills PetscFunctionReturn(0); 9884f53af40SRichard Tran Mills } 989190ae7a4SRichard Tran Mills 990190ae7a4SRichard Tran Mills PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 991190ae7a4SRichard Tran Mills { 992190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 993190ae7a4SRichard Tran Mills Mat A = product->A,P = product->B; 9941495fedeSRichard Tran Mills Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 995190ae7a4SRichard Tran Mills sparse_matrix_t csrA,csrP,csrC; 996190ae7a4SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 997190ae7a4SRichard Tran Mills struct matrix_descr descr_type_sym; 998190ae7a4SRichard Tran Mills PetscObjectState state; 999190ae7a4SRichard Tran Mills PetscErrorCode ierr; 1000190ae7a4SRichard Tran Mills 1001190ae7a4SRichard Tran Mills PetscFunctionBegin; 1002190ae7a4SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 1003190ae7a4SRichard Tran Mills if (!a->sparse_optimized || a->state != state) { 1004190ae7a4SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 1005190ae7a4SRichard Tran Mills } 1006190ae7a4SRichard Tran Mills ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1007190ae7a4SRichard Tran Mills if (!p->sparse_optimized || p->state != state) { 1008190ae7a4SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(P); 1009190ae7a4SRichard Tran Mills } 1010190ae7a4SRichard Tran Mills csrA = a->csrA; 1011190ae7a4SRichard Tran Mills csrP = p->csrA; 1012190ae7a4SRichard Tran Mills descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1013190ae7a4SRichard Tran Mills descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1014190ae7a4SRichard Tran Mills descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1015190ae7a4SRichard Tran Mills 1016190ae7a4SRichard Tran Mills /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1017190ae7a4SRichard Tran Mills if (csrP && csrA) { 1018190ae7a4SRichard Tran Mills stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1019db04c2a0SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1020190ae7a4SRichard Tran Mills } else { 1021190ae7a4SRichard Tran Mills csrC = PETSC_NULL; 1022190ae7a4SRichard Tran Mills } 1023190ae7a4SRichard Tran Mills 1024190ae7a4SRichard Tran Mills /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1025190ae7a4SRichard Tran Mills * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 102649ba5396SRichard Tran Mills * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 102749ba5396SRichard Tran Mills * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 102849ba5396SRichard Tran Mills * is guaranteed. */ 1029190ae7a4SRichard Tran Mills ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1030190ae7a4SRichard Tran Mills 1031190ae7a4SRichard Tran Mills C->ops->productnumeric = MatProductNumeric_PtAP; 1032190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1033190ae7a4SRichard Tran Mills } 1034190ae7a4SRichard Tran Mills 1035190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1036190ae7a4SRichard Tran Mills { 1037190ae7a4SRichard Tran Mills PetscFunctionBegin; 1038190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AB; 1039190ae7a4SRichard Tran Mills C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1040190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1041190ae7a4SRichard Tran Mills } 1042190ae7a4SRichard Tran Mills 1043190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1044190ae7a4SRichard Tran Mills { 1045190ae7a4SRichard Tran Mills PetscFunctionBegin; 1046190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1047190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1048190ae7a4SRichard Tran Mills } 1049190ae7a4SRichard Tran Mills 1050190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1051190ae7a4SRichard Tran Mills { 1052190ae7a4SRichard Tran Mills PetscFunctionBegin; 1053190ae7a4SRichard Tran Mills C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1054190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_ABt; 1055190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1056190ae7a4SRichard Tran Mills } 1057190ae7a4SRichard Tran Mills 1058190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1059190ae7a4SRichard Tran Mills { 1060190ae7a4SRichard Tran Mills PetscErrorCode ierr; 1061190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 1062190ae7a4SRichard Tran Mills Mat A = product->A; 1063190ae7a4SRichard Tran Mills PetscBool set, flag; 1064190ae7a4SRichard Tran Mills 1065190ae7a4SRichard Tran Mills PetscFunctionBegin; 1066190ae7a4SRichard Tran Mills #if defined(PETSC_USE_COMPLEX) 1067190ae7a4SRichard Tran Mills /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used. 1068190ae7a4SRichard Tran Mills * We do this in several other locations in this file. This works for the time being, but the _Basic() 1069190ae7a4SRichard Tran Mills * routines are considered unsafe and may be removed from the MatProduct code in the future. 1070190ae7a4SRichard Tran Mills * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */ 1071190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; 1072190ae7a4SRichard Tran Mills #else 1073190ae7a4SRichard Tran Mills /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1074190ae7a4SRichard Tran Mills ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1075190ae7a4SRichard Tran Mills if (set && flag) { 1076190ae7a4SRichard Tran Mills C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1077190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1078190ae7a4SRichard Tran Mills } else { 1079190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1080190ae7a4SRichard Tran Mills } 10811495fedeSRichard Tran Mills /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1082190ae7a4SRichard Tran Mills * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1083190ae7a4SRichard Tran Mills #endif 1084190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1085190ae7a4SRichard Tran Mills } 1086190ae7a4SRichard Tran Mills 1087190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1088190ae7a4SRichard Tran Mills { 1089190ae7a4SRichard Tran Mills PetscFunctionBegin; 1090190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1091190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1092190ae7a4SRichard Tran Mills } 1093190ae7a4SRichard Tran Mills 1094190ae7a4SRichard Tran Mills static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1095190ae7a4SRichard Tran Mills { 1096190ae7a4SRichard Tran Mills PetscFunctionBegin; 1097190ae7a4SRichard Tran Mills C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1098190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1099190ae7a4SRichard Tran Mills } 1100190ae7a4SRichard Tran Mills 1101190ae7a4SRichard Tran Mills PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1102190ae7a4SRichard Tran Mills { 1103190ae7a4SRichard Tran Mills PetscErrorCode ierr; 1104190ae7a4SRichard Tran Mills Mat_Product *product = C->product; 1105190ae7a4SRichard Tran Mills 1106190ae7a4SRichard Tran Mills PetscFunctionBegin; 1107190ae7a4SRichard Tran Mills switch (product->type) { 1108190ae7a4SRichard Tran Mills case MATPRODUCT_AB: 1109190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1110190ae7a4SRichard Tran Mills break; 1111190ae7a4SRichard Tran Mills case MATPRODUCT_AtB: 1112190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1113190ae7a4SRichard Tran Mills break; 1114190ae7a4SRichard Tran Mills case MATPRODUCT_ABt: 1115190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1116190ae7a4SRichard Tran Mills break; 1117190ae7a4SRichard Tran Mills case MATPRODUCT_PtAP: 1118190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1119190ae7a4SRichard Tran Mills break; 1120190ae7a4SRichard Tran Mills case MATPRODUCT_RARt: 1121190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1122190ae7a4SRichard Tran Mills break; 1123190ae7a4SRichard Tran Mills case MATPRODUCT_ABC: 1124190ae7a4SRichard Tran Mills ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1125190ae7a4SRichard Tran Mills break; 1126190ae7a4SRichard Tran Mills default: 1127190ae7a4SRichard Tran Mills break; 1128190ae7a4SRichard Tran Mills } 1129190ae7a4SRichard Tran Mills PetscFunctionReturn(0); 1130190ae7a4SRichard Tran Mills } 1131431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1132190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */ 11334f53af40SRichard Tran Mills 11344a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1135510b72f4SRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 11364a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 11374a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 11384a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 11394a2a386eSRichard Tran Mills { 11404a2a386eSRichard Tran Mills PetscErrorCode ierr; 11414a2a386eSRichard Tran Mills Mat B = *newmat; 11424a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 1143c9d46305SRichard Tran Mills PetscBool set; 1144e9c94282SRichard Tran Mills PetscBool sametype; 11454a2a386eSRichard Tran Mills 11464a2a386eSRichard Tran Mills PetscFunctionBegin; 11474a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 11484a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 11494a2a386eSRichard Tran Mills } 11504a2a386eSRichard Tran Mills 1151e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1152e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 1153e9c94282SRichard Tran Mills 11544a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 11554a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 11564a2a386eSRichard Tran Mills 1157df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 1158969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 11594a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 11604a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 11614a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 1162c9d46305SRichard Tran Mills 11634abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1164ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1165d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1166a8327b06SKarl Rupp #else 1167d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1168d995685eSRichard Tran Mills #endif 11695b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 11704abfa3b3SRichard Tran Mills 11714abfa3b3SRichard Tran Mills /* Parse command line options. */ 1172c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 117348292275SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 117448292275SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1175c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 1176ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1177d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 1178d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1179d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 1180d995685eSRichard Tran Mills } 1181d995685eSRichard Tran Mills #endif 1182c9d46305SRichard Tran Mills 1183ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1184df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1185969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1186df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1187969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 11888a369200SRichard Tran Mills # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1189190ae7a4SRichard Tran Mills B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1190190ae7a4SRichard Tran Mills B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1191190ae7a4SRichard Tran Mills B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1192190ae7a4SRichard Tran Mills B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1193190ae7a4SRichard Tran Mills B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1194ffcab697SRichard Tran Mills # if !defined(PETSC_USE_COMPLEX) 119549ba5396SRichard Tran Mills B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1196190ae7a4SRichard Tran Mills # else 1197190ae7a4SRichard Tran Mills B->ops->ptapnumeric = NULL; 11984f53af40SRichard Tran Mills # endif 1199e8be1fc7SRichard Tran Mills # endif 12001950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 12011950a7e7SRichard Tran Mills 1202213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1203213898a2SRichard Tran Mills /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1204213898a2SRichard Tran Mills * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1205213898a2SRichard Tran Mills * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1206213898a2SRichard Tran Mills * versions in which the older interface has not been deprecated, we use the old interface. */ 12071950a7e7SRichard Tran Mills if (aijmkl->no_SpMV2) { 12084a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 1209969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 12104a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 1211969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1212c9d46305SRichard Tran Mills } 12131950a7e7SRichard Tran Mills #endif 12144a2a386eSRichard Tran Mills 12154a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 12164a2a386eSRichard Tran Mills 1217190ae7a4SRichard Tran Mills if(!aijmkl->no_SpMV2) { 1218190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1219190ae7a4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1220190ae7a4SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr); 1221190ae7a4SRichard Tran Mills #endif 1222190ae7a4SRichard Tran Mills #endif 1223190ae7a4SRichard Tran Mills } 1224190ae7a4SRichard Tran Mills 12254a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 12264a2a386eSRichard Tran Mills *newmat = B; 12274a2a386eSRichard Tran Mills PetscFunctionReturn(0); 12284a2a386eSRichard Tran Mills } 12294a2a386eSRichard Tran Mills 12304a2a386eSRichard Tran Mills /*@C 12314a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 12324a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 12334a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 123490147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 123590147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 1236597ee276SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1237597ee276SRichard Tran Mills symmetric A) operations are currently supported. 1238597ee276SRichard Tran Mills Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 123990147e49SRichard Tran Mills 1240d083f849SBarry Smith Collective 12414a2a386eSRichard Tran Mills 12424a2a386eSRichard Tran Mills Input Parameters: 12434a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 12444a2a386eSRichard Tran Mills . m - number of rows 12454a2a386eSRichard Tran Mills . n - number of columns 12464a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 12474a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 12484a2a386eSRichard Tran Mills (possibly different for each row) or NULL 12494a2a386eSRichard Tran Mills 12504a2a386eSRichard Tran Mills Output Parameter: 12514a2a386eSRichard Tran Mills . A - the matrix 12524a2a386eSRichard Tran Mills 125390147e49SRichard Tran Mills Options Database Keys: 125466b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 125566b7eeb6SRichard Tran Mills - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 125690147e49SRichard Tran Mills 12574a2a386eSRichard Tran Mills Notes: 12584a2a386eSRichard Tran Mills If nnz is given then nz is ignored 12594a2a386eSRichard Tran Mills 12604a2a386eSRichard Tran Mills Level: intermediate 12614a2a386eSRichard Tran Mills 12624a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 12634a2a386eSRichard Tran Mills @*/ 12644a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 12654a2a386eSRichard Tran Mills { 12664a2a386eSRichard Tran Mills PetscErrorCode ierr; 12674a2a386eSRichard Tran Mills 12684a2a386eSRichard Tran Mills PetscFunctionBegin; 12694a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 12704a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 12714a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 12724a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 12734a2a386eSRichard Tran Mills PetscFunctionReturn(0); 12744a2a386eSRichard Tran Mills } 12754a2a386eSRichard Tran Mills 12764a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 12774a2a386eSRichard Tran Mills { 12784a2a386eSRichard Tran Mills PetscErrorCode ierr; 12794a2a386eSRichard Tran Mills 12804a2a386eSRichard Tran Mills PetscFunctionBegin; 12814a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 12824a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 12834a2a386eSRichard Tran Mills PetscFunctionReturn(0); 12844a2a386eSRichard Tran Mills } 1285