xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 45fbe4786d0bd596393157c5163c80feffa87361)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
16c9d46305SRichard Tran Mills   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
175b49642aSRichard Tran Mills   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
184abfa3b3SRichard Tran Mills   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20df555b71SRichard Tran Mills   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
284a2a386eSRichard Tran Mills {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   PetscErrorCode ierr;
324a2a386eSRichard Tran Mills   Mat            B       = *newmat;
33a8327b06SKarl Rupp #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
344a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35a8327b06SKarl Rupp #endif
364a2a386eSRichard Tran Mills 
374a2a386eSRichard Tran Mills   PetscFunctionBegin;
384a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
394a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
404a2a386eSRichard Tran Mills   }
414a2a386eSRichard Tran Mills 
424a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4354871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4654871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
47ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4854871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
49ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50*45fbe478SRichard Tran Mills   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
5187c2a1d7SRichard Tran Mills   B->ops->scale            = MatScale_SeqAIJ;
5287c2a1d7SRichard Tran Mills   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
5387c2a1d7SRichard Tran Mills   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
5487c2a1d7SRichard Tran Mills   B->ops->axpy             = MatAXPY_SeqAIJ;
554a2a386eSRichard Tran Mills 
56e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
58e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60*45fbe478SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
61*45fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62*45fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63*45fbe478SRichard Tran Mills #endif
64*45fbe478SRichard Tran Mills   }
65e9c94282SRichard Tran Mills 
664abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
68e9c94282SRichard Tran Mills    * the spptr pointer. */
694abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
70a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
71a8327b06SKarl Rupp 
724abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
730632b357SRichard Tran Mills     sparse_status_t stat;
744abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
754abfa3b3SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
764abfa3b3SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
774abfa3b3SRichard Tran Mills     }
784abfa3b3SRichard Tran Mills   }
794abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
80e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
814a2a386eSRichard Tran Mills 
824a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
834a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
844a2a386eSRichard Tran Mills 
854a2a386eSRichard Tran Mills   *newmat = B;
864a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
874a2a386eSRichard Tran Mills }
884a2a386eSRichard Tran Mills 
894a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
904a2a386eSRichard Tran Mills {
914a2a386eSRichard Tran Mills   PetscErrorCode ierr;
924a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
934a2a386eSRichard Tran Mills 
944a2a386eSRichard Tran Mills   PetscFunctionBegin;
95e9c94282SRichard Tran Mills 
96e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
97e9c94282SRichard Tran Mills    * spptr pointer. */
98e9c94282SRichard Tran Mills   if (aijmkl) {
994a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
1004abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1014abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
1024abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1034abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
1044abfa3b3SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) {
1054abfa3b3SRichard Tran Mills         PetscFunctionReturn(PETSC_ERR_LIB);
1064abfa3b3SRichard Tran Mills       }
1074abfa3b3SRichard Tran Mills     }
1084abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1094a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
110e9c94282SRichard Tran Mills   }
1114a2a386eSRichard Tran Mills 
1124a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1134a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1144a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1154a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1164a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1174a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1184a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1194a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1204a2a386eSRichard Tran Mills }
1214a2a386eSRichard Tran Mills 
1225b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1235b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1245b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1255b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1265b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1275b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1285b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1296e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1304a2a386eSRichard Tran Mills {
1316e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1326e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1336e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1346e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
135*45fbe478SRichard Tran Mills   PetscFunctionBegin;
1366e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1376e369cd5SRichard Tran Mills #else
138a8327b06SKarl Rupp   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
139a8327b06SKarl Rupp   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140a8327b06SKarl Rupp   PetscInt        m,n;
141a8327b06SKarl Rupp   MatScalar       *aa;
142a8327b06SKarl Rupp   PetscInt        *aj,*ai;
1436e369cd5SRichard Tran Mills   sparse_status_t stat;
1444a2a386eSRichard Tran Mills 
145a8327b06SKarl Rupp   PetscFunctionBegin;
1466e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1476e369cd5SRichard Tran Mills 
1480632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1490632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1500632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1510632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1520632b357SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
1530632b357SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
1540632b357SRichard Tran Mills     }
1550632b357SRichard Tran Mills   }
1568d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1576e369cd5SRichard Tran Mills 
158c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
159df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
160df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
161df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
16258678438SRichard Tran Mills   m = A->rmap->n;
16358678438SRichard Tran Mills   n = A->cmap->n;
164df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
16780095d54SIrina Sokolova   if ((a->nz!=0) & !(A->structure_only)) {
1688d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1698d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
17058678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173df555b71SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
174df555b71SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) {
175f68ad4bdSRichard Tran Mills       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
176df555b71SRichard Tran Mills       PetscFunctionReturn(PETSC_ERR_LIB);
177df555b71SRichard Tran Mills     }
1784abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
179c9d46305SRichard Tran Mills   }
1806e369cd5SRichard Tran Mills 
1816e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
182d995685eSRichard Tran Mills #endif
1836e369cd5SRichard Tran Mills }
1846e369cd5SRichard Tran Mills 
18519afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
18619afcda9SRichard Tran Mills  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
18719afcda9SRichard Tran Mills  * matrix handle. */
18819afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
18919afcda9SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,Mat *mat)
19019afcda9SRichard Tran Mills {
19119afcda9SRichard Tran Mills   PetscErrorCode ierr;
19219afcda9SRichard Tran Mills   sparse_status_t stat;
19319afcda9SRichard Tran Mills   sparse_index_base_t indexing;
19419afcda9SRichard Tran Mills   PetscInt nrows, ncols;
195*45fbe478SRichard Tran Mills   PetscInt *aj,*ai,*dummy;
19619afcda9SRichard Tran Mills   MatScalar *aa;
19719afcda9SRichard Tran Mills   Mat A;
19819afcda9SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
19919afcda9SRichard Tran Mills 
200*45fbe478SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
201*45fbe478SRichard Tran Mills   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
20219afcda9SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
20319afcda9SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
20419afcda9SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
20519afcda9SRichard Tran Mills   }
20619afcda9SRichard Tran Mills   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
20719afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
208*45fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
20919afcda9SRichard Tran Mills   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
21019afcda9SRichard Tran Mills 
21119afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
21219afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
21319afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
21419afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
21519afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
21619afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
21719afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
21819afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
21919afcda9SRichard Tran Mills   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
22019afcda9SRichard Tran Mills   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
22119afcda9SRichard Tran Mills   stat = mkl_sparse_optimize(aijmkl->csrA);
22219afcda9SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
22319afcda9SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
22419afcda9SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
22519afcda9SRichard Tran Mills   }
22619afcda9SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
22719afcda9SRichard Tran Mills 
22819afcda9SRichard Tran Mills   *mat = A;
22919afcda9SRichard Tran Mills   PetscFunctionReturn(0);
23019afcda9SRichard Tran Mills }
23119afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
23219afcda9SRichard Tran Mills 
2336e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
2346e369cd5SRichard Tran Mills {
2356e369cd5SRichard Tran Mills   PetscErrorCode ierr;
2366e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2376e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
2386e369cd5SRichard Tran Mills 
2396e369cd5SRichard Tran Mills   PetscFunctionBegin;
2406e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
2416e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
2426e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
2436e369cd5SRichard Tran Mills   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
2446e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
2455b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
2466e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2475b49642aSRichard Tran Mills   }
2486e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
2496e369cd5SRichard Tran Mills }
2506e369cd5SRichard Tran Mills 
2516e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
2526e369cd5SRichard Tran Mills {
2536e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
2546e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
2555b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
2566e369cd5SRichard Tran Mills 
2576e369cd5SRichard Tran Mills   PetscFunctionBegin;
2586e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2596e369cd5SRichard Tran Mills 
2606e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
2616e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
2626e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
2636e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
264d96e85feSRichard Tran Mills    * a lot of code duplication. */
2656e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
2666e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
2676e369cd5SRichard Tran Mills 
2685b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
2695b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
2705b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
2715b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
2726e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
2735b49642aSRichard Tran Mills   }
274df555b71SRichard Tran Mills 
2754a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
2764a2a386eSRichard Tran Mills }
2774a2a386eSRichard Tran Mills 
2784a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
2794a2a386eSRichard Tran Mills {
2804a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
2814a2a386eSRichard Tran Mills   const PetscScalar *x;
2824a2a386eSRichard Tran Mills   PetscScalar       *y;
2834a2a386eSRichard Tran Mills   const MatScalar   *aa;
2844a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
2854a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
286db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
287db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
288db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
2894a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
290db63039fSRichard Tran Mills   char              matdescra[6];
291db63039fSRichard Tran Mills 
2924a2a386eSRichard Tran Mills 
2934a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
294ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
295ff03dc53SRichard Tran Mills 
296ff03dc53SRichard Tran Mills   PetscFunctionBegin;
297db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
298db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
299ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
300ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
301ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
302ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
303ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
304ff03dc53SRichard Tran Mills 
305ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
306db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
307ff03dc53SRichard Tran Mills 
308ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
309ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
310ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
311ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
312ff03dc53SRichard Tran Mills }
313ff03dc53SRichard Tran Mills 
314d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
315df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
316df555b71SRichard Tran Mills {
317df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
318df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
319df555b71SRichard Tran Mills   const PetscScalar *x;
320df555b71SRichard Tran Mills   PetscScalar       *y;
321df555b71SRichard Tran Mills   PetscErrorCode    ierr;
322df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
323df555b71SRichard Tran Mills 
324df555b71SRichard Tran Mills   PetscFunctionBegin;
325df555b71SRichard Tran Mills 
32638987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
32738987b35SRichard Tran Mills   if(!a->nz) {
32838987b35SRichard Tran Mills     PetscInt i;
32938987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
33038987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
33138987b35SRichard Tran Mills     for (i=0; i<m; i++) {
33238987b35SRichard Tran Mills       y[i] = 0.0;
33338987b35SRichard Tran Mills     }
33438987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
33538987b35SRichard Tran Mills     PetscFunctionReturn(0);
33638987b35SRichard Tran Mills   }
337f36dfe3fSRichard Tran Mills 
338df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
339df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
340df555b71SRichard Tran Mills 
3413fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3423fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3433fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3443fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
3453fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
3463fa15762SRichard Tran Mills   }
3473fa15762SRichard Tran Mills 
348df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
349df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
350df555b71SRichard Tran Mills 
351df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
352df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
353df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
354df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
355df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
356df555b71SRichard Tran Mills   }
357df555b71SRichard Tran Mills   PetscFunctionReturn(0);
358df555b71SRichard Tran Mills }
359d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
360df555b71SRichard Tran Mills 
361ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
362ff03dc53SRichard Tran Mills {
363ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
364ff03dc53SRichard Tran Mills   const PetscScalar *x;
365ff03dc53SRichard Tran Mills   PetscScalar       *y;
366ff03dc53SRichard Tran Mills   const MatScalar   *aa;
367ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
368ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
369db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
370db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
371db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
372ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
373db63039fSRichard Tran Mills   char              matdescra[6];
374ff03dc53SRichard Tran Mills 
375ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
376ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
3774a2a386eSRichard Tran Mills 
3784a2a386eSRichard Tran Mills   PetscFunctionBegin;
379969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
380969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
3814a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3824a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
3834a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
3844a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
3854a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
3864a2a386eSRichard Tran Mills 
3874a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
388db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
3894a2a386eSRichard Tran Mills 
3904a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
3914a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3924a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
3934a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3944a2a386eSRichard Tran Mills }
3954a2a386eSRichard Tran Mills 
396d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
397df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
398df555b71SRichard Tran Mills {
399df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
400df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
401df555b71SRichard Tran Mills   const PetscScalar *x;
402df555b71SRichard Tran Mills   PetscScalar       *y;
403df555b71SRichard Tran Mills   PetscErrorCode    ierr;
4040632b357SRichard Tran Mills   sparse_status_t   stat;
405df555b71SRichard Tran Mills 
406df555b71SRichard Tran Mills   PetscFunctionBegin;
407df555b71SRichard Tran Mills 
40838987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
40938987b35SRichard Tran Mills   if(!a->nz) {
41038987b35SRichard Tran Mills     PetscInt i;
41138987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
41238987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
41338987b35SRichard Tran Mills     for (i=0; i<n; i++) {
41438987b35SRichard Tran Mills       y[i] = 0.0;
41538987b35SRichard Tran Mills     }
41638987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
41738987b35SRichard Tran Mills     PetscFunctionReturn(0);
41838987b35SRichard Tran Mills   }
419f36dfe3fSRichard Tran Mills 
420df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
421df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
422df555b71SRichard Tran Mills 
4233fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4243fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4253fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4263fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
4273fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4283fa15762SRichard Tran Mills   }
4293fa15762SRichard Tran Mills 
430df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
431df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
432df555b71SRichard Tran Mills 
433df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
434df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
435df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
436df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
437df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
438df555b71SRichard Tran Mills   }
439df555b71SRichard Tran Mills   PetscFunctionReturn(0);
440df555b71SRichard Tran Mills }
441d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
442df555b71SRichard Tran Mills 
4434a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
4444a2a386eSRichard Tran Mills {
4454a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
4464a2a386eSRichard Tran Mills   const PetscScalar *x;
4474a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
4484a2a386eSRichard Tran Mills   const MatScalar   *aa;
4494a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
4504a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
451db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
4524a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
4534a2a386eSRichard Tran Mills   PetscInt          i;
4544a2a386eSRichard Tran Mills 
455ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
456ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
457a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
458db63039fSRichard Tran Mills   PetscScalar       beta;
459a84739b8SRichard Tran Mills   char              matdescra[6];
460ff03dc53SRichard Tran Mills 
461ff03dc53SRichard Tran Mills   PetscFunctionBegin;
462a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
463a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
464a84739b8SRichard Tran Mills 
465ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
466ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
467ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
468ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
469ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
470ff03dc53SRichard Tran Mills 
471ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
472a84739b8SRichard Tran Mills   if (zz == yy) {
473a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
474db63039fSRichard Tran Mills     beta = 1.0;
475db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
476a84739b8SRichard Tran Mills   } else {
477db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
478db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
479db63039fSRichard Tran Mills     beta = 0.0;
480db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
481ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
482ff03dc53SRichard Tran Mills       z[i] += y[i];
483ff03dc53SRichard Tran Mills     }
484a84739b8SRichard Tran Mills   }
485ff03dc53SRichard Tran Mills 
486ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
487ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
488ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
489ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
490ff03dc53SRichard Tran Mills }
491ff03dc53SRichard Tran Mills 
492d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
493df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
494df555b71SRichard Tran Mills {
495df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
496df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
497df555b71SRichard Tran Mills   const PetscScalar *x;
498df555b71SRichard Tran Mills   PetscScalar       *y,*z;
499df555b71SRichard Tran Mills   PetscErrorCode    ierr;
500df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
501df555b71SRichard Tran Mills   PetscInt          i;
502df555b71SRichard Tran Mills 
503df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
504df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
505df555b71SRichard Tran Mills 
506df555b71SRichard Tran Mills   PetscFunctionBegin;
507df555b71SRichard Tran Mills 
50838987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
50938987b35SRichard Tran Mills   if(!a->nz) {
51038987b35SRichard Tran Mills     PetscInt i;
51138987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
51238987b35SRichard Tran Mills     for (i=0; i<m; i++) {
51338987b35SRichard Tran Mills       z[i] = y[i];
51438987b35SRichard Tran Mills     }
51538987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
51638987b35SRichard Tran Mills     PetscFunctionReturn(0);
51738987b35SRichard Tran Mills   }
518df555b71SRichard Tran Mills 
519df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
520df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
521df555b71SRichard Tran Mills 
5223fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5233fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5243fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5253fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
5263fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5273fa15762SRichard Tran Mills   }
5283fa15762SRichard Tran Mills 
529df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
530df555b71SRichard Tran Mills   if (zz == yy) {
531df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
532df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
533db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
534df555b71SRichard Tran Mills   } else {
535df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
536df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
537db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
538df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
539df555b71SRichard Tran Mills       z[i] += y[i];
540df555b71SRichard Tran Mills     }
541df555b71SRichard Tran Mills   }
542df555b71SRichard Tran Mills 
543df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
544df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
545df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
546df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
547df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
548df555b71SRichard Tran Mills   }
549df555b71SRichard Tran Mills   PetscFunctionReturn(0);
550df555b71SRichard Tran Mills }
551d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
552df555b71SRichard Tran Mills 
553ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
554ff03dc53SRichard Tran Mills {
555ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
556ff03dc53SRichard Tran Mills   const PetscScalar *x;
557ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
558ff03dc53SRichard Tran Mills   const MatScalar   *aa;
559ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
560ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
561db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
562ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
563ff03dc53SRichard Tran Mills   PetscInt          i;
564ff03dc53SRichard Tran Mills 
565ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
566ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
567a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
568db63039fSRichard Tran Mills   PetscScalar       beta;
569a84739b8SRichard Tran Mills   char              matdescra[6];
5704a2a386eSRichard Tran Mills 
5714a2a386eSRichard Tran Mills   PetscFunctionBegin;
572a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
573a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
574a84739b8SRichard Tran Mills 
5754a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5764a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
5774a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
5784a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
5794a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
5804a2a386eSRichard Tran Mills 
5814a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
582a84739b8SRichard Tran Mills   if (zz == yy) {
583a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
584db63039fSRichard Tran Mills     beta = 1.0;
585969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
586a84739b8SRichard Tran Mills   } else {
587db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
588db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
589db63039fSRichard Tran Mills     beta = 0.0;
590db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
591969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
5924a2a386eSRichard Tran Mills       z[i] += y[i];
5934a2a386eSRichard Tran Mills     }
594a84739b8SRichard Tran Mills   }
5954a2a386eSRichard Tran Mills 
5964a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
5974a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5984a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
5994a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6004a2a386eSRichard Tran Mills }
6014a2a386eSRichard Tran Mills 
602d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
603df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
604df555b71SRichard Tran Mills {
605df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
606df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
607df555b71SRichard Tran Mills   const PetscScalar *x;
608df555b71SRichard Tran Mills   PetscScalar       *y,*z;
609df555b71SRichard Tran Mills   PetscErrorCode    ierr;
610969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
611df555b71SRichard Tran Mills   PetscInt          i;
612df555b71SRichard Tran Mills 
613df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
614df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
615df555b71SRichard Tran Mills 
616df555b71SRichard Tran Mills   PetscFunctionBegin;
617df555b71SRichard Tran Mills 
61838987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
61938987b35SRichard Tran Mills   if(!a->nz) {
62038987b35SRichard Tran Mills     PetscInt i;
62138987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62238987b35SRichard Tran Mills     for (i=0; i<n; i++) {
62338987b35SRichard Tran Mills       z[i] = y[i];
62438987b35SRichard Tran Mills     }
62538987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
62638987b35SRichard Tran Mills     PetscFunctionReturn(0);
62738987b35SRichard Tran Mills   }
628f36dfe3fSRichard Tran Mills 
629df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
630df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
631df555b71SRichard Tran Mills 
6323fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6333fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6343fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6353fa15762SRichard Tran Mills   if (!aijmkl->sparse_optimized) {
6363fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
6373fa15762SRichard Tran Mills   }
6383fa15762SRichard Tran Mills 
639df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
640df555b71SRichard Tran Mills   if (zz == yy) {
641df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
642df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
643db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
644df555b71SRichard Tran Mills   } else {
645df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
646df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
647db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
648969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
649df555b71SRichard Tran Mills       z[i] += y[i];
650df555b71SRichard Tran Mills     }
651df555b71SRichard Tran Mills   }
652df555b71SRichard Tran Mills 
653df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
654df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
655df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
656df555b71SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
657df555b71SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
658df555b71SRichard Tran Mills   }
659df555b71SRichard Tran Mills   PetscFunctionReturn(0);
660df555b71SRichard Tran Mills }
661d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
662df555b71SRichard Tran Mills 
663*45fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
664*45fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
665*45fbe478SRichard Tran Mills {
666*45fbe478SRichard Tran Mills   Mat_SeqAIJMKL *a, *b;
667*45fbe478SRichard Tran Mills   sparse_matrix_t csrA, csrB, csrC;
668*45fbe478SRichard Tran Mills   PetscErrorCode ierr;
669*45fbe478SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
670*45fbe478SRichard Tran Mills 
671*45fbe478SRichard Tran Mills   PetscFunctionBegin;
672*45fbe478SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
673*45fbe478SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
674*45fbe478SRichard Tran Mills   if (!a->sparse_optimized) {
675*45fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
676*45fbe478SRichard Tran Mills   }
677*45fbe478SRichard Tran Mills   if (!b->sparse_optimized) {
678*45fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
679*45fbe478SRichard Tran Mills   }
680*45fbe478SRichard Tran Mills   csrA = a->csrA;
681*45fbe478SRichard Tran Mills   csrB = b->csrA;
682*45fbe478SRichard Tran Mills 
683*45fbe478SRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
684*45fbe478SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) {
685*45fbe478SRichard Tran Mills     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
686*45fbe478SRichard Tran Mills     PetscFunctionReturn(PETSC_ERR_LIB);
687*45fbe478SRichard Tran Mills   }
688*45fbe478SRichard Tran Mills 
689*45fbe478SRichard Tran Mills   /* TODO: Make this handle MAT_REUSE_MATRIX sensibly; MKL has no notion of this, but we still need to do the right thing. */
690*45fbe478SRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,C);CHKERRQ(ierr);
691*45fbe478SRichard Tran Mills 
692*45fbe478SRichard Tran Mills   PetscFunctionReturn(0);
693*45fbe478SRichard Tran Mills }
694*45fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
695*45fbe478SRichard Tran Mills 
69687c2a1d7SRichard Tran Mills PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
697db63039fSRichard Tran Mills {
698db63039fSRichard Tran Mills   PetscErrorCode ierr;
699db63039fSRichard Tran Mills 
70087c2a1d7SRichard Tran Mills   PetscFunctionBegin;
701db63039fSRichard Tran Mills   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
702db63039fSRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
703db63039fSRichard Tran Mills   PetscFunctionReturn(0);
704db63039fSRichard Tran Mills }
705df555b71SRichard Tran Mills 
70687c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
70787c2a1d7SRichard Tran Mills {
70887c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
70987c2a1d7SRichard Tran Mills 
71087c2a1d7SRichard Tran Mills   PetscFunctionBegin;
71187c2a1d7SRichard Tran Mills   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
71287c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
71387c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
71487c2a1d7SRichard Tran Mills }
71587c2a1d7SRichard Tran Mills 
71687c2a1d7SRichard Tran Mills PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
71787c2a1d7SRichard Tran Mills {
71887c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
71987c2a1d7SRichard Tran Mills 
72087c2a1d7SRichard Tran Mills   PetscFunctionBegin;
72187c2a1d7SRichard Tran Mills   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
72287c2a1d7SRichard Tran Mills   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
72387c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
72487c2a1d7SRichard Tran Mills }
72587c2a1d7SRichard Tran Mills 
72687c2a1d7SRichard Tran Mills PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
72787c2a1d7SRichard Tran Mills {
72887c2a1d7SRichard Tran Mills   PetscErrorCode ierr;
72987c2a1d7SRichard Tran Mills 
73087c2a1d7SRichard Tran Mills   PetscFunctionBegin;
73187c2a1d7SRichard Tran Mills   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
73287c2a1d7SRichard Tran Mills   if (str == SAME_NONZERO_PATTERN) {
73387c2a1d7SRichard Tran Mills     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
73487c2a1d7SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
73587c2a1d7SRichard Tran Mills   }
73687c2a1d7SRichard Tran Mills   PetscFunctionReturn(0);
73787c2a1d7SRichard Tran Mills }
73887c2a1d7SRichard Tran Mills 
7394a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
7404a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
7414a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
7424a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
7434a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
7444a2a386eSRichard Tran Mills {
7454a2a386eSRichard Tran Mills   PetscErrorCode ierr;
7464a2a386eSRichard Tran Mills   Mat            B = *newmat;
7474a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
748c9d46305SRichard Tran Mills   PetscBool      set;
749e9c94282SRichard Tran Mills   PetscBool      sametype;
7504a2a386eSRichard Tran Mills 
7514a2a386eSRichard Tran Mills   PetscFunctionBegin;
7524a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
7534a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
7544a2a386eSRichard Tran Mills   }
7554a2a386eSRichard Tran Mills 
756e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
757e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
758e9c94282SRichard Tran Mills 
7594a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
7604a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
7614a2a386eSRichard Tran Mills 
762df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
763969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
7644a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
7654a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
7664a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
767c9d46305SRichard Tran Mills 
7684abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
769d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
770d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
771a8327b06SKarl Rupp #else
772d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
773d995685eSRichard Tran Mills #endif
7745b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
7754abfa3b3SRichard Tran Mills 
7764abfa3b3SRichard Tran Mills   /* Parse command line options. */
777c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
778c9d46305SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
7795b49642aSRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
780c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
781d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
782d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
783d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
784d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
785d995685eSRichard Tran Mills   }
786d995685eSRichard Tran Mills #endif
787c9d46305SRichard Tran Mills 
788c9d46305SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
789d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
790df555b71SRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
791969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
792df555b71SRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
793969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
794*45fbe478SRichard Tran Mills     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
795d995685eSRichard Tran Mills #endif
796c9d46305SRichard Tran Mills   } else {
7974a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
798969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
7994a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
800969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
801c9d46305SRichard Tran Mills   }
8024a2a386eSRichard Tran Mills 
803db63039fSRichard Tran Mills   B->ops->scale              = MatScale_SeqAIJMKL;
80487c2a1d7SRichard Tran Mills   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
80587c2a1d7SRichard Tran Mills   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
80687c2a1d7SRichard Tran Mills   B->ops->axpy               = MatAXPY_SeqAIJMKL;
807db63039fSRichard Tran Mills 
808db63039fSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
8094a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
810e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
811e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
812e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
813*45fbe478SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
814*45fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
815*45fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
816*45fbe478SRichard Tran Mills #endif
817*45fbe478SRichard Tran Mills   }
8184a2a386eSRichard Tran Mills 
8194a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
8204a2a386eSRichard Tran Mills   *newmat = B;
8214a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
8224a2a386eSRichard Tran Mills }
8234a2a386eSRichard Tran Mills 
8244a2a386eSRichard Tran Mills /*@C
8254a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
8264a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
8274a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
82890147e49SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
82990147e49SRichard Tran Mills    operations are currently supported.
83090147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
83190147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
83290147e49SRichard Tran Mills 
8334a2a386eSRichard Tran Mills    Collective on MPI_Comm
8344a2a386eSRichard Tran Mills 
8354a2a386eSRichard Tran Mills    Input Parameters:
8364a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
8374a2a386eSRichard Tran Mills .  m - number of rows
8384a2a386eSRichard Tran Mills .  n - number of columns
8394a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
8404a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
8414a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
8424a2a386eSRichard Tran Mills 
8434a2a386eSRichard Tran Mills    Output Parameter:
8444a2a386eSRichard Tran Mills .  A - the matrix
8454a2a386eSRichard Tran Mills 
84690147e49SRichard Tran Mills    Options Database Keys:
84790147e49SRichard Tran Mills .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
84890147e49SRichard Tran Mills 
8494a2a386eSRichard Tran Mills    Notes:
8504a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
8514a2a386eSRichard Tran Mills 
8524a2a386eSRichard Tran Mills    Level: intermediate
8534a2a386eSRichard Tran Mills 
85490147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel
8554a2a386eSRichard Tran Mills 
8564a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
8574a2a386eSRichard Tran Mills @*/
8584a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
8594a2a386eSRichard Tran Mills {
8604a2a386eSRichard Tran Mills   PetscErrorCode ierr;
8614a2a386eSRichard Tran Mills 
8624a2a386eSRichard Tran Mills   PetscFunctionBegin;
8634a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
8644a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
8654a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
8664a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
8674a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
8684a2a386eSRichard Tran Mills }
8694a2a386eSRichard Tran Mills 
8704a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
8714a2a386eSRichard Tran Mills {
8724a2a386eSRichard Tran Mills   PetscErrorCode ierr;
8734a2a386eSRichard Tran Mills 
8744a2a386eSRichard Tran Mills   PetscFunctionBegin;
8754a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
8764a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
8774a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
8784a2a386eSRichard Tran Mills }
879