14a2a386eSRichard Tran Mills /* 24a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 34a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 44a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 54a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 64a2a386eSRichard Tran Mills wherever possible. 74a2a386eSRichard Tran Mills */ 84a2a386eSRichard Tran Mills 94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 114a2a386eSRichard Tran Mills 124a2a386eSRichard Tran Mills /* MKL include files. */ 134a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 144a2a386eSRichard Tran Mills 154a2a386eSRichard Tran Mills typedef struct { 16c9d46305SRichard Tran Mills PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 175b49642aSRichard Tran Mills PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 184abfa3b3SRichard Tran Mills PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20df555b71SRichard Tran Mills sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21df555b71SRichard Tran Mills struct matrix_descr descr; 22b8cbc1fbSRichard Tran Mills #endif 234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 244a2a386eSRichard Tran Mills 254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 264a2a386eSRichard Tran Mills 274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 284a2a386eSRichard Tran Mills { 294a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 304a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 314a2a386eSRichard Tran Mills PetscErrorCode ierr; 324a2a386eSRichard Tran Mills Mat B = *newmat; 33a8327b06SKarl Rupp #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 344a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35a8327b06SKarl Rupp #endif 364a2a386eSRichard Tran Mills 374a2a386eSRichard Tran Mills PetscFunctionBegin; 384a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 394a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 404a2a386eSRichard Tran Mills } 414a2a386eSRichard Tran Mills 424a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 4354871a98SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 444a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 454a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 4654871a98SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 47ff03dc53SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJ; 4854871a98SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 49ff03dc53SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50*45fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 5187c2a1d7SRichard Tran Mills B->ops->scale = MatScale_SeqAIJ; 5287c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 5387c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 5487c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJ; 554a2a386eSRichard Tran Mills 56e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 57e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60*45fbe478SRichard Tran Mills if(!aijmkl->no_SpMV2) { 61*45fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62*45fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63*45fbe478SRichard Tran Mills #endif 64*45fbe478SRichard Tran Mills } 65e9c94282SRichard Tran Mills 664abfa3b3SRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67e9c94282SRichard Tran Mills * simply involves destroying the MKL sparse matrix handle and then freeing 68e9c94282SRichard Tran Mills * the spptr pointer. */ 694abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 70a8327b06SKarl Rupp if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 71a8327b06SKarl Rupp 724abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 730632b357SRichard Tran Mills sparse_status_t stat; 744abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 754abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 764abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 774abfa3b3SRichard Tran Mills } 784abfa3b3SRichard Tran Mills } 794abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 80e9c94282SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 814a2a386eSRichard Tran Mills 824a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 834a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 844a2a386eSRichard Tran Mills 854a2a386eSRichard Tran Mills *newmat = B; 864a2a386eSRichard Tran Mills PetscFunctionReturn(0); 874a2a386eSRichard Tran Mills } 884a2a386eSRichard Tran Mills 894a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 904a2a386eSRichard Tran Mills { 914a2a386eSRichard Tran Mills PetscErrorCode ierr; 924a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 934a2a386eSRichard Tran Mills 944a2a386eSRichard Tran Mills PetscFunctionBegin; 95e9c94282SRichard Tran Mills 96e9c94282SRichard Tran Mills /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 97e9c94282SRichard Tran Mills * spptr pointer. */ 98e9c94282SRichard Tran Mills if (aijmkl) { 994a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 1004abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1014abfa3b3SRichard Tran Mills if (aijmkl->sparse_optimized) { 1024abfa3b3SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 1034abfa3b3SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1044abfa3b3SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1054abfa3b3SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1064abfa3b3SRichard Tran Mills } 1074abfa3b3SRichard Tran Mills } 1084abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1094a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 110e9c94282SRichard Tran Mills } 1114a2a386eSRichard Tran Mills 1124a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 1134a2a386eSRichard Tran Mills * to destroy everything that remains. */ 1144a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 1154a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 1164a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 1174a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 1184a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 1194a2a386eSRichard Tran Mills PetscFunctionReturn(0); 1204a2a386eSRichard Tran Mills } 1214a2a386eSRichard Tran Mills 1225b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 1235b49642aSRichard Tran Mills * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 1245b49642aSRichard Tran Mills * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 1255b49642aSRichard Tran Mills * handle, creates a new one, and then calls mkl_sparse_optimize(). 1265b49642aSRichard Tran Mills * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 1275b49642aSRichard Tran Mills * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 1285b49642aSRichard Tran Mills * an unoptimized matrix handle here. */ 1296e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 1304a2a386eSRichard Tran Mills { 1316e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1326e369cd5SRichard Tran Mills /* If the MKL library does not have mkl_sparse_optimize(), then this routine 1336e369cd5SRichard Tran Mills * does nothing. We make it callable anyway in this case because it cuts 1346e369cd5SRichard Tran Mills * down on littering the code with #ifdefs. */ 135*45fbe478SRichard Tran Mills PetscFunctionBegin; 1366e369cd5SRichard Tran Mills PetscFunctionReturn(0); 1376e369cd5SRichard Tran Mills #else 138a8327b06SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 139a8327b06SKarl Rupp Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 140a8327b06SKarl Rupp PetscInt m,n; 141a8327b06SKarl Rupp MatScalar *aa; 142a8327b06SKarl Rupp PetscInt *aj,*ai; 1436e369cd5SRichard Tran Mills sparse_status_t stat; 1444a2a386eSRichard Tran Mills 145a8327b06SKarl Rupp PetscFunctionBegin; 1466e369cd5SRichard Tran Mills if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 1476e369cd5SRichard Tran Mills 1480632b357SRichard Tran Mills if (aijmkl->sparse_optimized) { 1490632b357SRichard Tran Mills /* Matrix has been previously assembled and optimized. Must destroy old 1500632b357SRichard Tran Mills * matrix handle before running the optimization step again. */ 1510632b357SRichard Tran Mills stat = mkl_sparse_destroy(aijmkl->csrA); 1520632b357SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 1530632b357SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 1540632b357SRichard Tran Mills } 1550632b357SRichard Tran Mills } 1568d3fe1b0SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 1576e369cd5SRichard Tran Mills 158c9d46305SRichard Tran Mills /* Now perform the SpMV2 setup and matrix optimization. */ 159df555b71SRichard Tran Mills aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160df555b71SRichard Tran Mills aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161df555b71SRichard Tran Mills aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 16258678438SRichard Tran Mills m = A->rmap->n; 16358678438SRichard Tran Mills n = A->cmap->n; 164df555b71SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165df555b71SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 166df555b71SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 16780095d54SIrina Sokolova if ((a->nz!=0) & !(A->structure_only)) { 1688d3fe1b0SRichard Tran Mills /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 1698d3fe1b0SRichard Tran Mills * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 17058678438SRichard Tran Mills stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171df555b71SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 172df555b71SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 173df555b71SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 174df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 175f68ad4bdSRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 176df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 177df555b71SRichard Tran Mills } 1784abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 179c9d46305SRichard Tran Mills } 1806e369cd5SRichard Tran Mills 1816e369cd5SRichard Tran Mills PetscFunctionReturn(0); 182d995685eSRichard Tran Mills #endif 1836e369cd5SRichard Tran Mills } 1846e369cd5SRichard Tran Mills 18519afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 18619afcda9SRichard Tran Mills * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 18719afcda9SRichard Tran Mills * matrix handle. */ 18819afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 18919afcda9SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,Mat *mat) 19019afcda9SRichard Tran Mills { 19119afcda9SRichard Tran Mills PetscErrorCode ierr; 19219afcda9SRichard Tran Mills sparse_status_t stat; 19319afcda9SRichard Tran Mills sparse_index_base_t indexing; 19419afcda9SRichard Tran Mills PetscInt nrows, ncols; 195*45fbe478SRichard Tran Mills PetscInt *aj,*ai,*dummy; 19619afcda9SRichard Tran Mills MatScalar *aa; 19719afcda9SRichard Tran Mills Mat A; 19819afcda9SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 19919afcda9SRichard Tran Mills 200*45fbe478SRichard Tran Mills /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 201*45fbe478SRichard Tran Mills stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 20219afcda9SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 20319afcda9SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 20419afcda9SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 20519afcda9SRichard Tran Mills } 20619afcda9SRichard Tran Mills ierr = MatCreate(comm,&A);CHKERRQ(ierr); 20719afcda9SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 208*45fbe478SRichard Tran Mills ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 20919afcda9SRichard Tran Mills ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 21019afcda9SRichard Tran Mills 21119afcda9SRichard Tran Mills /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 21219afcda9SRichard Tran Mills * Now turn it into a MATSEQAIJMKL. */ 21319afcda9SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 21419afcda9SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 21519afcda9SRichard Tran Mills aijmkl->csrA = csrA; 21619afcda9SRichard Tran Mills /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 21719afcda9SRichard Tran Mills * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 21819afcda9SRichard Tran Mills * and just need to be able to run the MKL optimization step. */ 21919afcda9SRichard Tran Mills stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 22019afcda9SRichard Tran Mills stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 22119afcda9SRichard Tran Mills stat = mkl_sparse_optimize(aijmkl->csrA); 22219afcda9SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 22319afcda9SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 22419afcda9SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 22519afcda9SRichard Tran Mills } 22619afcda9SRichard Tran Mills aijmkl->sparse_optimized = PETSC_TRUE; 22719afcda9SRichard Tran Mills 22819afcda9SRichard Tran Mills *mat = A; 22919afcda9SRichard Tran Mills PetscFunctionReturn(0); 23019afcda9SRichard Tran Mills } 23119afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 23219afcda9SRichard Tran Mills 2336e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 2346e369cd5SRichard Tran Mills { 2356e369cd5SRichard Tran Mills PetscErrorCode ierr; 2366e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2376e369cd5SRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest; 2386e369cd5SRichard Tran Mills 2396e369cd5SRichard Tran Mills PetscFunctionBegin; 2406e369cd5SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 2416e369cd5SRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2426e369cd5SRichard Tran Mills aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 2436e369cd5SRichard Tran Mills ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 2446e369cd5SRichard Tran Mills aijmkl_dest->sparse_optimized = PETSC_FALSE; 2455b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 2466e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 2475b49642aSRichard Tran Mills } 2486e369cd5SRichard Tran Mills PetscFunctionReturn(0); 2496e369cd5SRichard Tran Mills } 2506e369cd5SRichard Tran Mills 2516e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 2526e369cd5SRichard Tran Mills { 2536e369cd5SRichard Tran Mills PetscErrorCode ierr; 2546e369cd5SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2555b49642aSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 2566e369cd5SRichard Tran Mills 2576e369cd5SRichard Tran Mills PetscFunctionBegin; 2586e369cd5SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2596e369cd5SRichard Tran Mills 2606e369cd5SRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 2616e369cd5SRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 2626e369cd5SRichard Tran Mills * routine for a MATSEQAIJ. 2636e369cd5SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 264d96e85feSRichard Tran Mills * a lot of code duplication. */ 2656e369cd5SRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 2666e369cd5SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 2676e369cd5SRichard Tran Mills 2685b49642aSRichard Tran Mills /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 2695b49642aSRichard Tran Mills * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 2705b49642aSRichard Tran Mills aijmkl = (Mat_SeqAIJMKL*) A->spptr; 2715b49642aSRichard Tran Mills if (aijmkl->eager_inspection) { 2726e369cd5SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 2735b49642aSRichard Tran Mills } 274df555b71SRichard Tran Mills 2754a2a386eSRichard Tran Mills PetscFunctionReturn(0); 2764a2a386eSRichard Tran Mills } 2774a2a386eSRichard Tran Mills 2784a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 2794a2a386eSRichard Tran Mills { 2804a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2814a2a386eSRichard Tran Mills const PetscScalar *x; 2824a2a386eSRichard Tran Mills PetscScalar *y; 2834a2a386eSRichard Tran Mills const MatScalar *aa; 2844a2a386eSRichard Tran Mills PetscErrorCode ierr; 2854a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 286db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 287db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 288db63039fSRichard Tran Mills PetscScalar beta = 0.0; 2894a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 290db63039fSRichard Tran Mills char matdescra[6]; 291db63039fSRichard Tran Mills 2924a2a386eSRichard Tran Mills 2934a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 294ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 295ff03dc53SRichard Tran Mills 296ff03dc53SRichard Tran Mills PetscFunctionBegin; 297db63039fSRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 298db63039fSRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 299ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 300ff03dc53SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 301ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 302ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 303ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 304ff03dc53SRichard Tran Mills 305ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 306db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 307ff03dc53SRichard Tran Mills 308ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 309ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 310ff03dc53SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 311ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 312ff03dc53SRichard Tran Mills } 313ff03dc53SRichard Tran Mills 314d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 315df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 316df555b71SRichard Tran Mills { 317df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 318df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 319df555b71SRichard Tran Mills const PetscScalar *x; 320df555b71SRichard Tran Mills PetscScalar *y; 321df555b71SRichard Tran Mills PetscErrorCode ierr; 322df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 323df555b71SRichard Tran Mills 324df555b71SRichard Tran Mills PetscFunctionBegin; 325df555b71SRichard Tran Mills 32638987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 32738987b35SRichard Tran Mills if(!a->nz) { 32838987b35SRichard Tran Mills PetscInt i; 32938987b35SRichard Tran Mills PetscInt m=A->rmap->n; 33038987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 33138987b35SRichard Tran Mills for (i=0; i<m; i++) { 33238987b35SRichard Tran Mills y[i] = 0.0; 33338987b35SRichard Tran Mills } 33438987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 33538987b35SRichard Tran Mills PetscFunctionReturn(0); 33638987b35SRichard Tran Mills } 337f36dfe3fSRichard Tran Mills 338df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 339df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 340df555b71SRichard Tran Mills 3413fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 3423fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 3433fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 3443fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 3453fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 3463fa15762SRichard Tran Mills } 3473fa15762SRichard Tran Mills 348df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMult. */ 349df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 350df555b71SRichard Tran Mills 351df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 352df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 353df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 354df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 355df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 356df555b71SRichard Tran Mills } 357df555b71SRichard Tran Mills PetscFunctionReturn(0); 358df555b71SRichard Tran Mills } 359d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 360df555b71SRichard Tran Mills 361ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 362ff03dc53SRichard Tran Mills { 363ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 364ff03dc53SRichard Tran Mills const PetscScalar *x; 365ff03dc53SRichard Tran Mills PetscScalar *y; 366ff03dc53SRichard Tran Mills const MatScalar *aa; 367ff03dc53SRichard Tran Mills PetscErrorCode ierr; 368ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 369db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 370db63039fSRichard Tran Mills PetscScalar alpha = 1.0; 371db63039fSRichard Tran Mills PetscScalar beta = 0.0; 372ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 373db63039fSRichard Tran Mills char matdescra[6]; 374ff03dc53SRichard Tran Mills 375ff03dc53SRichard Tran Mills /* Variables not in MatMultTranspose_SeqAIJ. */ 376ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 3774a2a386eSRichard Tran Mills 3784a2a386eSRichard Tran Mills PetscFunctionBegin; 379969800c5SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 380969800c5SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 3814a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 3824a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 3834a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 3844a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 3854a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 3864a2a386eSRichard Tran Mills 3874a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 388db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 3894a2a386eSRichard Tran Mills 3904a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 3914a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 3924a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 3934a2a386eSRichard Tran Mills PetscFunctionReturn(0); 3944a2a386eSRichard Tran Mills } 3954a2a386eSRichard Tran Mills 396d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 397df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 398df555b71SRichard Tran Mills { 399df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 400df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 401df555b71SRichard Tran Mills const PetscScalar *x; 402df555b71SRichard Tran Mills PetscScalar *y; 403df555b71SRichard Tran Mills PetscErrorCode ierr; 4040632b357SRichard Tran Mills sparse_status_t stat; 405df555b71SRichard Tran Mills 406df555b71SRichard Tran Mills PetscFunctionBegin; 407df555b71SRichard Tran Mills 40838987b35SRichard Tran Mills /* If there are no nonzero entries, zero yy and return immediately. */ 40938987b35SRichard Tran Mills if(!a->nz) { 41038987b35SRichard Tran Mills PetscInt i; 41138987b35SRichard Tran Mills PetscInt n=A->cmap->n; 41238987b35SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 41338987b35SRichard Tran Mills for (i=0; i<n; i++) { 41438987b35SRichard Tran Mills y[i] = 0.0; 41538987b35SRichard Tran Mills } 41638987b35SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 41738987b35SRichard Tran Mills PetscFunctionReturn(0); 41838987b35SRichard Tran Mills } 419f36dfe3fSRichard Tran Mills 420df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 421df555b71SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 422df555b71SRichard Tran Mills 4233fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 4243fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 4253fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 4263fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 4273fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 4283fa15762SRichard Tran Mills } 4293fa15762SRichard Tran Mills 430df555b71SRichard Tran Mills /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 431df555b71SRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 432df555b71SRichard Tran Mills 433df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 434df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 435df555b71SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 436df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 437df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 438df555b71SRichard Tran Mills } 439df555b71SRichard Tran Mills PetscFunctionReturn(0); 440df555b71SRichard Tran Mills } 441d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 442df555b71SRichard Tran Mills 4434a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 4444a2a386eSRichard Tran Mills { 4454a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4464a2a386eSRichard Tran Mills const PetscScalar *x; 4474a2a386eSRichard Tran Mills PetscScalar *y,*z; 4484a2a386eSRichard Tran Mills const MatScalar *aa; 4494a2a386eSRichard Tran Mills PetscErrorCode ierr; 4504a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 451db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 4524a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 4534a2a386eSRichard Tran Mills PetscInt i; 4544a2a386eSRichard Tran Mills 455ff03dc53SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 456ff03dc53SRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 457a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 458db63039fSRichard Tran Mills PetscScalar beta; 459a84739b8SRichard Tran Mills char matdescra[6]; 460ff03dc53SRichard Tran Mills 461ff03dc53SRichard Tran Mills PetscFunctionBegin; 462a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 463a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 464a84739b8SRichard Tran Mills 465ff03dc53SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 466ff03dc53SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 467ff03dc53SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 468ff03dc53SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 469ff03dc53SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 470ff03dc53SRichard Tran Mills 471ff03dc53SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 472a84739b8SRichard Tran Mills if (zz == yy) { 473a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 474db63039fSRichard Tran Mills beta = 1.0; 475db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 476a84739b8SRichard Tran Mills } else { 477db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 478db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 479db63039fSRichard Tran Mills beta = 0.0; 480db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 481ff03dc53SRichard Tran Mills for (i=0; i<m; i++) { 482ff03dc53SRichard Tran Mills z[i] += y[i]; 483ff03dc53SRichard Tran Mills } 484a84739b8SRichard Tran Mills } 485ff03dc53SRichard Tran Mills 486ff03dc53SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 487ff03dc53SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 488ff03dc53SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 489ff03dc53SRichard Tran Mills PetscFunctionReturn(0); 490ff03dc53SRichard Tran Mills } 491ff03dc53SRichard Tran Mills 492d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 493df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 494df555b71SRichard Tran Mills { 495df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 496df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 497df555b71SRichard Tran Mills const PetscScalar *x; 498df555b71SRichard Tran Mills PetscScalar *y,*z; 499df555b71SRichard Tran Mills PetscErrorCode ierr; 500df555b71SRichard Tran Mills PetscInt m=A->rmap->n; 501df555b71SRichard Tran Mills PetscInt i; 502df555b71SRichard Tran Mills 503df555b71SRichard Tran Mills /* Variables not in MatMultAdd_SeqAIJ. */ 504df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 505df555b71SRichard Tran Mills 506df555b71SRichard Tran Mills PetscFunctionBegin; 507df555b71SRichard Tran Mills 50838987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 50938987b35SRichard Tran Mills if(!a->nz) { 51038987b35SRichard Tran Mills PetscInt i; 51138987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 51238987b35SRichard Tran Mills for (i=0; i<m; i++) { 51338987b35SRichard Tran Mills z[i] = y[i]; 51438987b35SRichard Tran Mills } 51538987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 51638987b35SRichard Tran Mills PetscFunctionReturn(0); 51738987b35SRichard Tran Mills } 518df555b71SRichard Tran Mills 519df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 520df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 521df555b71SRichard Tran Mills 5223fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 5233fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 5243fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 5253fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 5263fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 5273fa15762SRichard Tran Mills } 5283fa15762SRichard Tran Mills 529df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 530df555b71SRichard Tran Mills if (zz == yy) { 531df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 532df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 533db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 534df555b71SRichard Tran Mills } else { 535df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 536df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 537db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 538df555b71SRichard Tran Mills for (i=0; i<m; i++) { 539df555b71SRichard Tran Mills z[i] += y[i]; 540df555b71SRichard Tran Mills } 541df555b71SRichard Tran Mills } 542df555b71SRichard Tran Mills 543df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 544df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 545df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 546df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 547df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 548df555b71SRichard Tran Mills } 549df555b71SRichard Tran Mills PetscFunctionReturn(0); 550df555b71SRichard Tran Mills } 551d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 552df555b71SRichard Tran Mills 553ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 554ff03dc53SRichard Tran Mills { 555ff03dc53SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 556ff03dc53SRichard Tran Mills const PetscScalar *x; 557ff03dc53SRichard Tran Mills PetscScalar *y,*z; 558ff03dc53SRichard Tran Mills const MatScalar *aa; 559ff03dc53SRichard Tran Mills PetscErrorCode ierr; 560ff03dc53SRichard Tran Mills PetscInt m=A->rmap->n; 561db63039fSRichard Tran Mills PetscInt n=A->cmap->n; 562ff03dc53SRichard Tran Mills const PetscInt *aj,*ai; 563ff03dc53SRichard Tran Mills PetscInt i; 564ff03dc53SRichard Tran Mills 565ff03dc53SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 566ff03dc53SRichard Tran Mills char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 567a84739b8SRichard Tran Mills PetscScalar alpha = 1.0; 568db63039fSRichard Tran Mills PetscScalar beta; 569a84739b8SRichard Tran Mills char matdescra[6]; 5704a2a386eSRichard Tran Mills 5714a2a386eSRichard Tran Mills PetscFunctionBegin; 572a84739b8SRichard Tran Mills matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 573a84739b8SRichard Tran Mills matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 574a84739b8SRichard Tran Mills 5754a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 5764a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 5774a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 5784a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 5794a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 5804a2a386eSRichard Tran Mills 5814a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 582a84739b8SRichard Tran Mills if (zz == yy) { 583a84739b8SRichard Tran Mills /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 584db63039fSRichard Tran Mills beta = 1.0; 585969800c5SRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 586a84739b8SRichard Tran Mills } else { 587db63039fSRichard Tran Mills /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 588db63039fSRichard Tran Mills * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 589db63039fSRichard Tran Mills beta = 0.0; 590db63039fSRichard Tran Mills mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 591969800c5SRichard Tran Mills for (i=0; i<n; i++) { 5924a2a386eSRichard Tran Mills z[i] += y[i]; 5934a2a386eSRichard Tran Mills } 594a84739b8SRichard Tran Mills } 5954a2a386eSRichard Tran Mills 5964a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 5974a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 5984a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 5994a2a386eSRichard Tran Mills PetscFunctionReturn(0); 6004a2a386eSRichard Tran Mills } 6014a2a386eSRichard Tran Mills 602d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 603df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 604df555b71SRichard Tran Mills { 605df555b71SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 606df555b71SRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 607df555b71SRichard Tran Mills const PetscScalar *x; 608df555b71SRichard Tran Mills PetscScalar *y,*z; 609df555b71SRichard Tran Mills PetscErrorCode ierr; 610969800c5SRichard Tran Mills PetscInt n=A->cmap->n; 611df555b71SRichard Tran Mills PetscInt i; 612df555b71SRichard Tran Mills 613df555b71SRichard Tran Mills /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 614df555b71SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 615df555b71SRichard Tran Mills 616df555b71SRichard Tran Mills PetscFunctionBegin; 617df555b71SRichard Tran Mills 61838987b35SRichard Tran Mills /* If there are no nonzero entries, set zz = yy and return immediately. */ 61938987b35SRichard Tran Mills if(!a->nz) { 62038987b35SRichard Tran Mills PetscInt i; 62138987b35SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 62238987b35SRichard Tran Mills for (i=0; i<n; i++) { 62338987b35SRichard Tran Mills z[i] = y[i]; 62438987b35SRichard Tran Mills } 62538987b35SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 62638987b35SRichard Tran Mills PetscFunctionReturn(0); 62738987b35SRichard Tran Mills } 628f36dfe3fSRichard Tran Mills 629df555b71SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 630df555b71SRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 631df555b71SRichard Tran Mills 6323fa15762SRichard Tran Mills /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 6333fa15762SRichard Tran Mills * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 6343fa15762SRichard Tran Mills * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 6353fa15762SRichard Tran Mills if (!aijmkl->sparse_optimized) { 6363fa15762SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 6373fa15762SRichard Tran Mills } 6383fa15762SRichard Tran Mills 639df555b71SRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 640df555b71SRichard Tran Mills if (zz == yy) { 641df555b71SRichard Tran Mills /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 642df555b71SRichard Tran Mills * with alpha and beta both set to 1.0. */ 643db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 644df555b71SRichard Tran Mills } else { 645df555b71SRichard Tran Mills /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 646df555b71SRichard Tran Mills * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 647db63039fSRichard Tran Mills stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 648969800c5SRichard Tran Mills for (i=0; i<n; i++) { 649df555b71SRichard Tran Mills z[i] += y[i]; 650df555b71SRichard Tran Mills } 651df555b71SRichard Tran Mills } 652df555b71SRichard Tran Mills 653df555b71SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 654df555b71SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 655df555b71SRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 656df555b71SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 657df555b71SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 658df555b71SRichard Tran Mills } 659df555b71SRichard Tran Mills PetscFunctionReturn(0); 660df555b71SRichard Tran Mills } 661d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 662df555b71SRichard Tran Mills 663*45fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 664*45fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 665*45fbe478SRichard Tran Mills { 666*45fbe478SRichard Tran Mills Mat_SeqAIJMKL *a, *b; 667*45fbe478SRichard Tran Mills sparse_matrix_t csrA, csrB, csrC; 668*45fbe478SRichard Tran Mills PetscErrorCode ierr; 669*45fbe478SRichard Tran Mills sparse_status_t stat = SPARSE_STATUS_SUCCESS; 670*45fbe478SRichard Tran Mills 671*45fbe478SRichard Tran Mills PetscFunctionBegin; 672*45fbe478SRichard Tran Mills a = (Mat_SeqAIJMKL*)A->spptr; 673*45fbe478SRichard Tran Mills b = (Mat_SeqAIJMKL*)B->spptr; 674*45fbe478SRichard Tran Mills if (!a->sparse_optimized) { 675*45fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(A); 676*45fbe478SRichard Tran Mills } 677*45fbe478SRichard Tran Mills if (!b->sparse_optimized) { 678*45fbe478SRichard Tran Mills MatSeqAIJMKL_create_mkl_handle(B); 679*45fbe478SRichard Tran Mills } 680*45fbe478SRichard Tran Mills csrA = a->csrA; 681*45fbe478SRichard Tran Mills csrB = b->csrA; 682*45fbe478SRichard Tran Mills 683*45fbe478SRichard Tran Mills stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 684*45fbe478SRichard Tran Mills if (stat != SPARSE_STATUS_SUCCESS) { 685*45fbe478SRichard Tran Mills SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 686*45fbe478SRichard Tran Mills PetscFunctionReturn(PETSC_ERR_LIB); 687*45fbe478SRichard Tran Mills } 688*45fbe478SRichard Tran Mills 689*45fbe478SRichard Tran Mills /* TODO: Make this handle MAT_REUSE_MATRIX sensibly; MKL has no notion of this, but we still need to do the right thing. */ 690*45fbe478SRichard Tran Mills ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,C);CHKERRQ(ierr); 691*45fbe478SRichard Tran Mills 692*45fbe478SRichard Tran Mills PetscFunctionReturn(0); 693*45fbe478SRichard Tran Mills } 694*45fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 695*45fbe478SRichard Tran Mills 69687c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 697db63039fSRichard Tran Mills { 698db63039fSRichard Tran Mills PetscErrorCode ierr; 699db63039fSRichard Tran Mills 70087c2a1d7SRichard Tran Mills PetscFunctionBegin; 701db63039fSRichard Tran Mills ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 702db63039fSRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 703db63039fSRichard Tran Mills PetscFunctionReturn(0); 704db63039fSRichard Tran Mills } 705df555b71SRichard Tran Mills 70687c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 70787c2a1d7SRichard Tran Mills { 70887c2a1d7SRichard Tran Mills PetscErrorCode ierr; 70987c2a1d7SRichard Tran Mills 71087c2a1d7SRichard Tran Mills PetscFunctionBegin; 71187c2a1d7SRichard Tran Mills ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 71287c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 71387c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 71487c2a1d7SRichard Tran Mills } 71587c2a1d7SRichard Tran Mills 71687c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 71787c2a1d7SRichard Tran Mills { 71887c2a1d7SRichard Tran Mills PetscErrorCode ierr; 71987c2a1d7SRichard Tran Mills 72087c2a1d7SRichard Tran Mills PetscFunctionBegin; 72187c2a1d7SRichard Tran Mills ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 72287c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 72387c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 72487c2a1d7SRichard Tran Mills } 72587c2a1d7SRichard Tran Mills 72687c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 72787c2a1d7SRichard Tran Mills { 72887c2a1d7SRichard Tran Mills PetscErrorCode ierr; 72987c2a1d7SRichard Tran Mills 73087c2a1d7SRichard Tran Mills PetscFunctionBegin; 73187c2a1d7SRichard Tran Mills ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 73287c2a1d7SRichard Tran Mills if (str == SAME_NONZERO_PATTERN) { 73387c2a1d7SRichard Tran Mills /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 73487c2a1d7SRichard Tran Mills ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 73587c2a1d7SRichard Tran Mills } 73687c2a1d7SRichard Tran Mills PetscFunctionReturn(0); 73787c2a1d7SRichard Tran Mills } 73887c2a1d7SRichard Tran Mills 7394a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 7404a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 7414a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 7424a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 7434a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 7444a2a386eSRichard Tran Mills { 7454a2a386eSRichard Tran Mills PetscErrorCode ierr; 7464a2a386eSRichard Tran Mills Mat B = *newmat; 7474a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 748c9d46305SRichard Tran Mills PetscBool set; 749e9c94282SRichard Tran Mills PetscBool sametype; 7504a2a386eSRichard Tran Mills 7514a2a386eSRichard Tran Mills PetscFunctionBegin; 7524a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 7534a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 7544a2a386eSRichard Tran Mills } 7554a2a386eSRichard Tran Mills 756e9c94282SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 757e9c94282SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 758e9c94282SRichard Tran Mills 7594a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 7604a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 7614a2a386eSRichard Tran Mills 762df555b71SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. 763969800c5SRichard Tran Mills * We also parse some command line options below, since those determine some of the methods we point to. */ 7644a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 7654a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 7664a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 767c9d46305SRichard Tran Mills 7684abfa3b3SRichard Tran Mills aijmkl->sparse_optimized = PETSC_FALSE; 769d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 770d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 771a8327b06SKarl Rupp #else 772d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 773d995685eSRichard Tran Mills #endif 7745b49642aSRichard Tran Mills aijmkl->eager_inspection = PETSC_FALSE; 7754abfa3b3SRichard Tran Mills 7764abfa3b3SRichard Tran Mills /* Parse command line options. */ 777c9d46305SRichard Tran Mills ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 778c9d46305SRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 7795b49642aSRichard Tran Mills ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 780c9d46305SRichard Tran Mills ierr = PetscOptionsEnd();CHKERRQ(ierr); 781d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 782d995685eSRichard Tran Mills if(!aijmkl->no_SpMV2) { 783d995685eSRichard Tran Mills ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 784d995685eSRichard Tran Mills aijmkl->no_SpMV2 = PETSC_TRUE; 785d995685eSRichard Tran Mills } 786d995685eSRichard Tran Mills #endif 787c9d46305SRichard Tran Mills 788c9d46305SRichard Tran Mills if(!aijmkl->no_SpMV2) { 789d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 790df555b71SRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 791969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 792df555b71SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 793969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 794*45fbe478SRichard Tran Mills B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 795d995685eSRichard Tran Mills #endif 796c9d46305SRichard Tran Mills } else { 7974a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 798969800c5SRichard Tran Mills B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 7994a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 800969800c5SRichard Tran Mills B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 801c9d46305SRichard Tran Mills } 8024a2a386eSRichard Tran Mills 803db63039fSRichard Tran Mills B->ops->scale = MatScale_SeqAIJMKL; 80487c2a1d7SRichard Tran Mills B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 80587c2a1d7SRichard Tran Mills B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 80687c2a1d7SRichard Tran Mills B->ops->axpy = MatAXPY_SeqAIJMKL; 807db63039fSRichard Tran Mills 808db63039fSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 8094a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 810e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 811e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 812e9c94282SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 813*45fbe478SRichard Tran Mills if(!aijmkl->no_SpMV2) { 814*45fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 815*45fbe478SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 816*45fbe478SRichard Tran Mills #endif 817*45fbe478SRichard Tran Mills } 8184a2a386eSRichard Tran Mills 8194a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 8204a2a386eSRichard Tran Mills *newmat = B; 8214a2a386eSRichard Tran Mills PetscFunctionReturn(0); 8224a2a386eSRichard Tran Mills } 8234a2a386eSRichard Tran Mills 8244a2a386eSRichard Tran Mills /*@C 8254a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 8264a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 8274a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 82890147e49SRichard Tran Mills MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 82990147e49SRichard Tran Mills operations are currently supported. 83090147e49SRichard Tran Mills If the installed version of MKL supports the "SpMV2" sparse 83190147e49SRichard Tran Mills inspector-executor routines, then those are used by default. 83290147e49SRichard Tran Mills 8334a2a386eSRichard Tran Mills Collective on MPI_Comm 8344a2a386eSRichard Tran Mills 8354a2a386eSRichard Tran Mills Input Parameters: 8364a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 8374a2a386eSRichard Tran Mills . m - number of rows 8384a2a386eSRichard Tran Mills . n - number of columns 8394a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 8404a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 8414a2a386eSRichard Tran Mills (possibly different for each row) or NULL 8424a2a386eSRichard Tran Mills 8434a2a386eSRichard Tran Mills Output Parameter: 8444a2a386eSRichard Tran Mills . A - the matrix 8454a2a386eSRichard Tran Mills 84690147e49SRichard Tran Mills Options Database Keys: 84790147e49SRichard Tran Mills . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 84890147e49SRichard Tran Mills 8494a2a386eSRichard Tran Mills Notes: 8504a2a386eSRichard Tran Mills If nnz is given then nz is ignored 8514a2a386eSRichard Tran Mills 8524a2a386eSRichard Tran Mills Level: intermediate 8534a2a386eSRichard Tran Mills 85490147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel 8554a2a386eSRichard Tran Mills 8564a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 8574a2a386eSRichard Tran Mills @*/ 8584a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 8594a2a386eSRichard Tran Mills { 8604a2a386eSRichard Tran Mills PetscErrorCode ierr; 8614a2a386eSRichard Tran Mills 8624a2a386eSRichard Tran Mills PetscFunctionBegin; 8634a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 8644a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 8654a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 8664a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 8674a2a386eSRichard Tran Mills PetscFunctionReturn(0); 8684a2a386eSRichard Tran Mills } 8694a2a386eSRichard Tran Mills 8704a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 8714a2a386eSRichard Tran Mills { 8724a2a386eSRichard Tran Mills PetscErrorCode ierr; 8734a2a386eSRichard Tran Mills 8744a2a386eSRichard Tran Mills PetscFunctionBegin; 8754a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 8764a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8774a2a386eSRichard Tran Mills PetscFunctionReturn(0); 8784a2a386eSRichard Tran Mills } 879