xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 4d51fa235bd829bb25a66fbf33234f77442fbc7a)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
114a2a386eSRichard Tran Mills 
124a2a386eSRichard Tran Mills /* MKL include files. */
134a2a386eSRichard Tran Mills #include <mkl_spblas.h>  /* Sparse BLAS */
144a2a386eSRichard Tran Mills 
154a2a386eSRichard Tran Mills typedef struct {
16c9d46305SRichard Tran Mills   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
175b49642aSRichard Tran Mills   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
184abfa3b3SRichard Tran Mills   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19551aa5c8SRichard Tran Mills   PetscObjectState state;
20b8cbc1fbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21df555b71SRichard Tran Mills   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22df555b71SRichard Tran Mills   struct matrix_descr descr;
23b8cbc1fbSRichard Tran Mills #endif
244a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
254a2a386eSRichard Tran Mills 
264a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
274a2a386eSRichard Tran Mills 
284a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
294a2a386eSRichard Tran Mills {
304a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
314a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
324a2a386eSRichard Tran Mills   PetscErrorCode ierr;
334a2a386eSRichard Tran Mills   Mat            B       = *newmat;
34c1d5218aSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
354a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36c1d5218aSRichard Tran Mills #endif
374a2a386eSRichard Tran Mills 
384a2a386eSRichard Tran Mills   PetscFunctionBegin;
394a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
404a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
414a2a386eSRichard Tran Mills   }
424a2a386eSRichard Tran Mills 
434a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4454871a98SRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJ;
454a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
464a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJ;
4754871a98SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJ;
48ff03dc53SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
4954871a98SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJ;
50ff03dc53SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
5145fbe478SRichard Tran Mills   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
52e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
534f53af40SRichard Tran Mills   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
544f53af40SRichard Tran Mills   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55372ec6bbSRichard Tran Mills   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
564a2a386eSRichard Tran Mills 
57e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
6145fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
624a940b00SSatish Balay   if(!aijmkl->no_SpMV2) {
6345fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
65e8be1fc7SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
66e8be1fc7SRichard Tran Mills #endif
67372ec6bbSRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
6845fbe478SRichard Tran Mills   }
69e9c94282SRichard Tran Mills 
704abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
71e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
72e9c94282SRichard Tran Mills    * the spptr pointer. */
73a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
74a8327b06SKarl Rupp 
754abfa3b3SRichard Tran Mills   if (aijmkl->sparse_optimized) {
760632b357SRichard Tran Mills     sparse_status_t stat;
774abfa3b3SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
789c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
794abfa3b3SRichard Tran Mills   }
804abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
81e9c94282SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
824a2a386eSRichard Tran Mills 
834a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
844a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
854a2a386eSRichard Tran Mills 
864a2a386eSRichard Tran Mills   *newmat = B;
874a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
884a2a386eSRichard Tran Mills }
894a2a386eSRichard Tran Mills 
904a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
914a2a386eSRichard Tran Mills {
924a2a386eSRichard Tran Mills   PetscErrorCode ierr;
934a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
944a2a386eSRichard Tran Mills 
954a2a386eSRichard Tran Mills   PetscFunctionBegin;
96e9c94282SRichard Tran Mills 
97e9c94282SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
98e9c94282SRichard Tran Mills    * spptr pointer. */
99e9c94282SRichard Tran Mills   if (aijmkl) {
1004a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
1014abfa3b3SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1024abfa3b3SRichard Tran Mills     if (aijmkl->sparse_optimized) {
1034abfa3b3SRichard Tran Mills       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
1044abfa3b3SRichard Tran Mills       stat = mkl_sparse_destroy(aijmkl->csrA);
1059c46acdfSRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1064abfa3b3SRichard Tran Mills     }
1074abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1084a2a386eSRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
109e9c94282SRichard Tran Mills   }
1104a2a386eSRichard Tran Mills 
1114a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
1124a2a386eSRichard Tran Mills    * to destroy everything that remains. */
1134a2a386eSRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
1144a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
1154a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
1164a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
1174a2a386eSRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
1184a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
1194a2a386eSRichard Tran Mills }
1204a2a386eSRichard Tran Mills 
1215b49642aSRichard Tran Mills /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1225b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1235b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1245b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1255b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1265b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1275b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
1286e369cd5SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
1294a2a386eSRichard Tran Mills {
1306e369cd5SRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1316e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1326e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1336e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
13445fbe478SRichard Tran Mills   PetscFunctionBegin;
1356e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1366e369cd5SRichard Tran Mills #else
137a8327b06SKarl Rupp   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
138a8327b06SKarl Rupp   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139a8327b06SKarl Rupp   PetscInt         m,n;
140a8327b06SKarl Rupp   MatScalar        *aa;
141a8327b06SKarl Rupp   PetscInt         *aj,*ai;
1426e369cd5SRichard Tran Mills   sparse_status_t  stat;
143551aa5c8SRichard Tran Mills   PetscErrorCode   ierr;
1444a2a386eSRichard Tran Mills 
145a8327b06SKarl Rupp   PetscFunctionBegin;
146*4d51fa23SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M)
147*4d51fa23SRichard Tran Mills   /* Versions of MKL that don't have mkl_sparse_sp2m() still support the old, non-inspector-executor interfaces. For these versions,
148*4d51fa23SRichard Tran Mills    * we simply exit. Versions that do have mkl_sparse_sp2m() (version 18, update 2 and later) have deprecated the old interfaces.
149*4d51fa23SRichard Tran Mills    * In this case, we must use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by
150*4d51fa23SRichard Tran Mills    * not calling mkl_sparse_optimize() later. */
1516e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
152*4d51fa23SRichard Tran Mills #endif
1536e369cd5SRichard Tran Mills 
1540632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1550632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1560632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
1570632b357SRichard Tran Mills     stat = mkl_sparse_destroy(aijmkl->csrA);
1589c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
1590632b357SRichard Tran Mills   }
1608d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1616e369cd5SRichard Tran Mills 
162c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
163df555b71SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
164df555b71SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
165df555b71SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
16658678438SRichard Tran Mills   m = A->rmap->n;
16758678438SRichard Tran Mills   n = A->cmap->n;
168df555b71SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
169df555b71SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
170df555b71SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
17180095d54SIrina Sokolova   if ((a->nz!=0) & !(A->structure_only)) {
1728d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1738d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
17458678438SRichard Tran Mills     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
175e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
176df555b71SRichard Tran Mills     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
177e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
178df555b71SRichard Tran Mills     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
179e8be1fc7SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
18050a5026bSRichard Tran Mills     if (!aijmkl->no_SpMV2) {
181df555b71SRichard Tran Mills       stat = mkl_sparse_optimize(aijmkl->csrA);
182e8be1fc7SRichard Tran Mills       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
18350a5026bSRichard Tran Mills     }
1844abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
185e995cf24SRichard Tran Mills     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
186c9d46305SRichard Tran Mills   }
1876e369cd5SRichard Tran Mills 
1886e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
189d995685eSRichard Tran Mills #endif
1906e369cd5SRichard Tran Mills }
1916e369cd5SRichard Tran Mills 
19219afcda9SRichard Tran Mills /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
19319afcda9SRichard Tran Mills  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
1946c87cf42SRichard Tran Mills  * matrix handle.
195aab60f1bSRichard Tran Mills  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
196aab60f1bSRichard Tran Mills  * there is no good alternative. */
19719afcda9SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1986c87cf42SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
19919afcda9SRichard Tran Mills {
20019afcda9SRichard Tran Mills   PetscErrorCode      ierr;
20119afcda9SRichard Tran Mills   sparse_status_t     stat;
20219afcda9SRichard Tran Mills   sparse_index_base_t indexing;
20319afcda9SRichard Tran Mills   PetscInt            nrows, ncols;
20445fbe478SRichard Tran Mills   PetscInt            *aj,*ai,*dummy;
20519afcda9SRichard Tran Mills   MatScalar           *aa;
20619afcda9SRichard Tran Mills   Mat                 A;
20719afcda9SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
20819afcda9SRichard Tran Mills 
20945fbe478SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
21045fbe478SRichard Tran Mills   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
2119c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
2126c87cf42SRichard Tran Mills 
213aab60f1bSRichard Tran Mills   if (reuse == MAT_REUSE_MATRIX) {
214aab60f1bSRichard Tran Mills     ierr = MatDestroy(mat);CHKERRQ(ierr);
215aab60f1bSRichard Tran Mills   }
21619afcda9SRichard Tran Mills   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
21719afcda9SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
21845fbe478SRichard Tran Mills   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
219aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
220aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
221aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
222aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
22319afcda9SRichard Tran Mills   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
22419afcda9SRichard Tran Mills 
22519afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
22619afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
22719afcda9SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2286c87cf42SRichard Tran Mills 
22919afcda9SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
23019afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2316c87cf42SRichard Tran Mills 
23219afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
23319afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
23419afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
235f3fd1758SRichard Tran Mills   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
236f3fd1758SRichard Tran Mills   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
237f3fd1758SRichard Tran Mills   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
23819afcda9SRichard Tran Mills   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
23951539a68SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
24019afcda9SRichard Tran Mills   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
24151539a68SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
24250a5026bSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
24319afcda9SRichard Tran Mills     stat = mkl_sparse_optimize(aijmkl->csrA);
24451539a68SRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
24550a5026bSRichard Tran Mills   }
24619afcda9SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
247e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
24819afcda9SRichard Tran Mills 
24919afcda9SRichard Tran Mills   *mat = A;
25019afcda9SRichard Tran Mills   PetscFunctionReturn(0);
25119afcda9SRichard Tran Mills }
25219afcda9SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
25319afcda9SRichard Tran Mills 
254e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
255e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
256e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
257e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
258e8be1fc7SRichard Tran Mills PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
259e8be1fc7SRichard Tran Mills {
260e8be1fc7SRichard Tran Mills   PetscInt            i;
261e8be1fc7SRichard Tran Mills   PetscInt            nrows,ncols;
262e8be1fc7SRichard Tran Mills   PetscInt            nz;
263e8be1fc7SRichard Tran Mills   PetscInt            *ai,*aj,*dummy;
264e8be1fc7SRichard Tran Mills   PetscScalar         *aa;
265e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
266e8be1fc7SRichard Tran Mills   Mat_SeqAIJMKL       *aijmkl;
267e8be1fc7SRichard Tran Mills   sparse_status_t     stat;
268e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
269e8be1fc7SRichard Tran Mills 
270e8be1fc7SRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
271e8be1fc7SRichard Tran Mills 
272e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
273e8be1fc7SRichard Tran Mills   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
274e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
275e8be1fc7SRichard Tran Mills 
276e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
277e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
278e8be1fc7SRichard Tran Mills   for (i=0; i<nrows; i++) {
279e8be1fc7SRichard Tran Mills     nz = ai[i+1] - ai[i];
280e8be1fc7SRichard Tran Mills     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
281e8be1fc7SRichard Tran Mills   }
282e8be1fc7SRichard Tran Mills 
283e8be1fc7SRichard Tran Mills   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284e8be1fc7SRichard Tran Mills   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
285e8be1fc7SRichard Tran Mills 
286e995cf24SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
287e995cf24SRichard Tran Mills   /* We mark our matrix as having a valid, optimized MKL handle.
288e995cf24SRichard Tran Mills    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
289e995cf24SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_TRUE;
290e995cf24SRichard Tran Mills 
291e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
292e8be1fc7SRichard Tran Mills }
293e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
294e8be1fc7SRichard Tran Mills 
2956e369cd5SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
2966e369cd5SRichard Tran Mills {
2976e369cd5SRichard Tran Mills   PetscErrorCode ierr;
2986e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
2996e369cd5SRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl_dest;
3006e369cd5SRichard Tran Mills 
3016e369cd5SRichard Tran Mills   PetscFunctionBegin;
3026e369cd5SRichard Tran Mills   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
3036e369cd5SRichard Tran Mills   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
3046e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
3056e369cd5SRichard Tran Mills   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
3066e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3075b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3086e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3095b49642aSRichard Tran Mills   }
3106e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3116e369cd5SRichard Tran Mills }
3126e369cd5SRichard Tran Mills 
3136e369cd5SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
3146e369cd5SRichard Tran Mills {
3156e369cd5SRichard Tran Mills   PetscErrorCode  ierr;
3166e369cd5SRichard Tran Mills   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
3175b49642aSRichard Tran Mills   Mat_SeqAIJMKL   *aijmkl;
3186e369cd5SRichard Tran Mills 
3196e369cd5SRichard Tran Mills   PetscFunctionBegin;
3206e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3216e369cd5SRichard Tran Mills 
3226e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3236e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3246e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3256e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
326d96e85feSRichard Tran Mills    * a lot of code duplication. */
3276e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
3286e369cd5SRichard Tran Mills   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
3296e369cd5SRichard Tran Mills 
3305b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3315b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3325b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
3335b49642aSRichard Tran Mills   if (aijmkl->eager_inspection) {
3346e369cd5SRichard Tran Mills     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
3355b49642aSRichard Tran Mills   }
336df555b71SRichard Tran Mills 
3374a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3384a2a386eSRichard Tran Mills }
3394a2a386eSRichard Tran Mills 
34050a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
3414a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
3424a2a386eSRichard Tran Mills {
3434a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3444a2a386eSRichard Tran Mills   const PetscScalar *x;
3454a2a386eSRichard Tran Mills   PetscScalar       *y;
3464a2a386eSRichard Tran Mills   const MatScalar   *aa;
3474a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
3484a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
349db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
350db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
351db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
3524a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
353db63039fSRichard Tran Mills   char              matdescra[6];
354db63039fSRichard Tran Mills 
3554a2a386eSRichard Tran Mills 
3564a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
357ff03dc53SRichard Tran Mills   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
358ff03dc53SRichard Tran Mills 
359ff03dc53SRichard Tran Mills   PetscFunctionBegin;
360db63039fSRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
361db63039fSRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
362ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
363ff03dc53SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
364ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
365ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
366ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
367ff03dc53SRichard Tran Mills 
368ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
369db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
370ff03dc53SRichard Tran Mills 
371ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
372ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
373ff03dc53SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
374ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
375ff03dc53SRichard Tran Mills }
37650a5026bSRichard Tran Mills #endif
377ff03dc53SRichard Tran Mills 
378d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
379df555b71SRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
380df555b71SRichard Tran Mills {
381df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
382df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
383df555b71SRichard Tran Mills   const PetscScalar *x;
384df555b71SRichard Tran Mills   PetscScalar       *y;
385df555b71SRichard Tran Mills   PetscErrorCode    ierr;
386df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
387551aa5c8SRichard Tran Mills   PetscObjectState  state;
388df555b71SRichard Tran Mills 
389df555b71SRichard Tran Mills   PetscFunctionBegin;
390df555b71SRichard Tran Mills 
39138987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
39238987b35SRichard Tran Mills   if(!a->nz) {
39338987b35SRichard Tran Mills     PetscInt i;
39438987b35SRichard Tran Mills     PetscInt m=A->rmap->n;
39538987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
39638987b35SRichard Tran Mills     for (i=0; i<m; i++) {
39738987b35SRichard Tran Mills       y[i] = 0.0;
39838987b35SRichard Tran Mills     }
39938987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
40038987b35SRichard Tran Mills     PetscFunctionReturn(0);
40138987b35SRichard Tran Mills   }
402f36dfe3fSRichard Tran Mills 
403df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
404df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
405df555b71SRichard Tran Mills 
4063fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4073fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4083fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
409551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
410551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4113fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4123fa15762SRichard Tran Mills   }
4133fa15762SRichard Tran Mills 
414df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
415df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
4169c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
417df555b71SRichard Tran Mills 
418df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
419df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
420df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
421df555b71SRichard Tran Mills   PetscFunctionReturn(0);
422df555b71SRichard Tran Mills }
423d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
424df555b71SRichard Tran Mills 
42550a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
426ff03dc53SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
427ff03dc53SRichard Tran Mills {
428ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429ff03dc53SRichard Tran Mills   const PetscScalar *x;
430ff03dc53SRichard Tran Mills   PetscScalar       *y;
431ff03dc53SRichard Tran Mills   const MatScalar   *aa;
432ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
433ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
434db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
435db63039fSRichard Tran Mills   PetscScalar       alpha = 1.0;
436db63039fSRichard Tran Mills   PetscScalar       beta = 0.0;
437ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
438db63039fSRichard Tran Mills   char              matdescra[6];
439ff03dc53SRichard Tran Mills 
440ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
441ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
4424a2a386eSRichard Tran Mills 
4434a2a386eSRichard Tran Mills   PetscFunctionBegin;
444969800c5SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
445969800c5SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
4464a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4474a2a386eSRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
4484a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
4494a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
4504a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
4514a2a386eSRichard Tran Mills 
4524a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
453db63039fSRichard Tran Mills   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
4544a2a386eSRichard Tran Mills 
4554a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
4564a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4574a2a386eSRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
4584a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4594a2a386eSRichard Tran Mills }
46050a5026bSRichard Tran Mills #endif
4614a2a386eSRichard Tran Mills 
462d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
463df555b71SRichard Tran Mills PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
464df555b71SRichard Tran Mills {
465df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
466df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
467df555b71SRichard Tran Mills   const PetscScalar *x;
468df555b71SRichard Tran Mills   PetscScalar       *y;
469df555b71SRichard Tran Mills   PetscErrorCode    ierr;
4700632b357SRichard Tran Mills   sparse_status_t   stat;
471551aa5c8SRichard Tran Mills   PetscObjectState  state;
472df555b71SRichard Tran Mills 
473df555b71SRichard Tran Mills   PetscFunctionBegin;
474df555b71SRichard Tran Mills 
47538987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
47638987b35SRichard Tran Mills   if(!a->nz) {
47738987b35SRichard Tran Mills     PetscInt i;
47838987b35SRichard Tran Mills     PetscInt n=A->cmap->n;
47938987b35SRichard Tran Mills     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
48038987b35SRichard Tran Mills     for (i=0; i<n; i++) {
48138987b35SRichard Tran Mills       y[i] = 0.0;
48238987b35SRichard Tran Mills     }
48338987b35SRichard Tran Mills     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
48438987b35SRichard Tran Mills     PetscFunctionReturn(0);
48538987b35SRichard Tran Mills   }
486f36dfe3fSRichard Tran Mills 
487df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
488df555b71SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
489df555b71SRichard Tran Mills 
4903fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4913fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4923fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
493551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
494551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
4953fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
4963fa15762SRichard Tran Mills   }
4973fa15762SRichard Tran Mills 
498df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
499df555b71SRichard Tran Mills   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
5009c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
501df555b71SRichard Tran Mills 
502df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
503df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
504df555b71SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
505df555b71SRichard Tran Mills   PetscFunctionReturn(0);
506df555b71SRichard Tran Mills }
507d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
508df555b71SRichard Tran Mills 
50950a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
5104a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
5114a2a386eSRichard Tran Mills {
5124a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
5134a2a386eSRichard Tran Mills   const PetscScalar *x;
5144a2a386eSRichard Tran Mills   PetscScalar       *y,*z;
5154a2a386eSRichard Tran Mills   const MatScalar   *aa;
5164a2a386eSRichard Tran Mills   PetscErrorCode    ierr;
5174a2a386eSRichard Tran Mills   PetscInt          m=A->rmap->n;
518db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
5194a2a386eSRichard Tran Mills   const PetscInt    *aj,*ai;
5204a2a386eSRichard Tran Mills   PetscInt          i;
5214a2a386eSRichard Tran Mills 
522ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
523ff03dc53SRichard Tran Mills   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
524a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
525db63039fSRichard Tran Mills   PetscScalar       beta;
526a84739b8SRichard Tran Mills   char              matdescra[6];
527ff03dc53SRichard Tran Mills 
528ff03dc53SRichard Tran Mills   PetscFunctionBegin;
529a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
530a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
531a84739b8SRichard Tran Mills 
532ff03dc53SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
533ff03dc53SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
534ff03dc53SRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
535ff03dc53SRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
536ff03dc53SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
537ff03dc53SRichard Tran Mills 
538ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
539a84739b8SRichard Tran Mills   if (zz == yy) {
540a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
541db63039fSRichard Tran Mills     beta = 1.0;
542db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
543a84739b8SRichard Tran Mills   } else {
544db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
545db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
546db63039fSRichard Tran Mills     beta = 0.0;
547db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
548ff03dc53SRichard Tran Mills     for (i=0; i<m; i++) {
549ff03dc53SRichard Tran Mills       z[i] += y[i];
550ff03dc53SRichard Tran Mills     }
551a84739b8SRichard Tran Mills   }
552ff03dc53SRichard Tran Mills 
553ff03dc53SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
554ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
555ff03dc53SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
556ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
557ff03dc53SRichard Tran Mills }
55850a5026bSRichard Tran Mills #endif
559ff03dc53SRichard Tran Mills 
560d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
561df555b71SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
562df555b71SRichard Tran Mills {
563df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
564df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
565df555b71SRichard Tran Mills   const PetscScalar *x;
566df555b71SRichard Tran Mills   PetscScalar       *y,*z;
567df555b71SRichard Tran Mills   PetscErrorCode    ierr;
568df555b71SRichard Tran Mills   PetscInt          m=A->rmap->n;
569df555b71SRichard Tran Mills   PetscInt          i;
570df555b71SRichard Tran Mills 
571df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
572df555b71SRichard Tran Mills   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
573551aa5c8SRichard Tran Mills   PetscObjectState  state;
574df555b71SRichard Tran Mills 
575df555b71SRichard Tran Mills   PetscFunctionBegin;
576df555b71SRichard Tran Mills 
57738987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
57838987b35SRichard Tran Mills   if(!a->nz) {
57938987b35SRichard Tran Mills     PetscInt i;
58038987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
58138987b35SRichard Tran Mills     for (i=0; i<m; i++) {
58238987b35SRichard Tran Mills       z[i] = y[i];
58338987b35SRichard Tran Mills     }
58438987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
58538987b35SRichard Tran Mills     PetscFunctionReturn(0);
58638987b35SRichard Tran Mills   }
587df555b71SRichard Tran Mills 
588df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
589df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
590df555b71SRichard Tran Mills 
5913fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5923fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5933fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
594551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
595551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
5963fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
5973fa15762SRichard Tran Mills   }
5983fa15762SRichard Tran Mills 
599df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
600df555b71SRichard Tran Mills   if (zz == yy) {
601df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
602df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
603db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
6049c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
605df555b71SRichard Tran Mills   } else {
606df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
607df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
608db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
6099c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
610df555b71SRichard Tran Mills     for (i=0; i<m; i++) {
611df555b71SRichard Tran Mills       z[i] += y[i];
612df555b71SRichard Tran Mills     }
613df555b71SRichard Tran Mills   }
614df555b71SRichard Tran Mills 
615df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
616df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
617df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
618df555b71SRichard Tran Mills   PetscFunctionReturn(0);
619df555b71SRichard Tran Mills }
620d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
621df555b71SRichard Tran Mills 
62250a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
623ff03dc53SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
624ff03dc53SRichard Tran Mills {
625ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
626ff03dc53SRichard Tran Mills   const PetscScalar *x;
627ff03dc53SRichard Tran Mills   PetscScalar       *y,*z;
628ff03dc53SRichard Tran Mills   const MatScalar   *aa;
629ff03dc53SRichard Tran Mills   PetscErrorCode    ierr;
630ff03dc53SRichard Tran Mills   PetscInt          m=A->rmap->n;
631db63039fSRichard Tran Mills   PetscInt          n=A->cmap->n;
632ff03dc53SRichard Tran Mills   const PetscInt    *aj,*ai;
633ff03dc53SRichard Tran Mills   PetscInt          i;
634ff03dc53SRichard Tran Mills 
635ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
636ff03dc53SRichard Tran Mills   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
637a84739b8SRichard Tran Mills   PetscScalar       alpha = 1.0;
638db63039fSRichard Tran Mills   PetscScalar       beta;
639a84739b8SRichard Tran Mills   char              matdescra[6];
6404a2a386eSRichard Tran Mills 
6414a2a386eSRichard Tran Mills   PetscFunctionBegin;
642a84739b8SRichard Tran Mills   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
643a84739b8SRichard Tran Mills   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
644a84739b8SRichard Tran Mills 
6454a2a386eSRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6464a2a386eSRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6474a2a386eSRichard Tran Mills   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
6484a2a386eSRichard Tran Mills   aa   = a->a;  /* Nonzero elements stored row-by-row. */
6494a2a386eSRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
6504a2a386eSRichard Tran Mills 
6514a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
652a84739b8SRichard Tran Mills   if (zz == yy) {
653a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
654db63039fSRichard Tran Mills     beta = 1.0;
655969800c5SRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
656a84739b8SRichard Tran Mills   } else {
657db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
658db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
659db63039fSRichard Tran Mills     beta = 0.0;
660db63039fSRichard Tran Mills     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
661969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
6624a2a386eSRichard Tran Mills       z[i] += y[i];
6634a2a386eSRichard Tran Mills     }
664a84739b8SRichard Tran Mills   }
6654a2a386eSRichard Tran Mills 
6664a2a386eSRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
6674a2a386eSRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6684a2a386eSRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
6694a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6704a2a386eSRichard Tran Mills }
67150a5026bSRichard Tran Mills #endif
6724a2a386eSRichard Tran Mills 
673d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
674df555b71SRichard Tran Mills PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
675df555b71SRichard Tran Mills {
676df555b71SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
677df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
678df555b71SRichard Tran Mills   const PetscScalar *x;
679df555b71SRichard Tran Mills   PetscScalar       *y,*z;
680df555b71SRichard Tran Mills   PetscErrorCode    ierr;
681969800c5SRichard Tran Mills   PetscInt          n=A->cmap->n;
682df555b71SRichard Tran Mills   PetscInt          i;
683551aa5c8SRichard Tran Mills   PetscObjectState  state;
684df555b71SRichard Tran Mills 
685df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
686df555b71SRichard Tran Mills   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
687df555b71SRichard Tran Mills 
688df555b71SRichard Tran Mills   PetscFunctionBegin;
689df555b71SRichard Tran Mills 
69038987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
69138987b35SRichard Tran Mills   if(!a->nz) {
69238987b35SRichard Tran Mills     PetscInt i;
69338987b35SRichard Tran Mills     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
69438987b35SRichard Tran Mills     for (i=0; i<n; i++) {
69538987b35SRichard Tran Mills       z[i] = y[i];
69638987b35SRichard Tran Mills     }
69738987b35SRichard Tran Mills     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
69838987b35SRichard Tran Mills     PetscFunctionReturn(0);
69938987b35SRichard Tran Mills   }
700f36dfe3fSRichard Tran Mills 
701df555b71SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
702df555b71SRichard Tran Mills   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
703df555b71SRichard Tran Mills 
7043fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
7053fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
7063fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
707551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
708551aa5c8SRichard Tran Mills   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
7093fa15762SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
7103fa15762SRichard Tran Mills   }
7113fa15762SRichard Tran Mills 
712df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
713df555b71SRichard Tran Mills   if (zz == yy) {
714df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
715df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
716db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
7179c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
718df555b71SRichard Tran Mills   } else {
719df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
720df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
721db63039fSRichard Tran Mills     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
7229c46acdfSRichard Tran Mills     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
723969800c5SRichard Tran Mills     for (i=0; i<n; i++) {
724df555b71SRichard Tran Mills       z[i] += y[i];
725df555b71SRichard Tran Mills     }
726df555b71SRichard Tran Mills   }
727df555b71SRichard Tran Mills 
728df555b71SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
729df555b71SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
730df555b71SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
731df555b71SRichard Tran Mills   PetscFunctionReturn(0);
732df555b71SRichard Tran Mills }
733d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
734df555b71SRichard Tran Mills 
73545fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
736aab60f1bSRichard Tran Mills /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
737aab60f1bSRichard Tran Mills  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
7383ecbffd0SRichard Tran Mills  * For releases of MKL prior to version 18, update 2:
739aab60f1bSRichard Tran Mills  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
740aab60f1bSRichard Tran Mills  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
741aab60f1bSRichard Tran Mills  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
742aab60f1bSRichard Tran Mills  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
74345fbe478SRichard Tran Mills PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
74445fbe478SRichard Tran Mills {
74545fbe478SRichard Tran Mills   Mat_SeqAIJMKL    *a, *b;
74645fbe478SRichard Tran Mills   sparse_matrix_t  csrA, csrB, csrC;
74745fbe478SRichard Tran Mills   PetscErrorCode   ierr;
74845fbe478SRichard Tran Mills   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
749551aa5c8SRichard Tran Mills   PetscObjectState state;
75045fbe478SRichard Tran Mills 
75145fbe478SRichard Tran Mills   PetscFunctionBegin;
75245fbe478SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
75345fbe478SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
754551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
755551aa5c8SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
75645fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
75745fbe478SRichard Tran Mills   }
758551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
759551aa5c8SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
76045fbe478SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
76145fbe478SRichard Tran Mills   }
76245fbe478SRichard Tran Mills   csrA = a->csrA;
76345fbe478SRichard Tran Mills   csrB = b->csrA;
76445fbe478SRichard Tran Mills 
76545fbe478SRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
7669c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
76745fbe478SRichard Tran Mills 
7686c87cf42SRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
76945fbe478SRichard Tran Mills 
77045fbe478SRichard Tran Mills   PetscFunctionReturn(0);
77145fbe478SRichard Tran Mills }
77245fbe478SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
77345fbe478SRichard Tran Mills 
774e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
775e8be1fc7SRichard Tran Mills PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
776e8be1fc7SRichard Tran Mills {
777e8be1fc7SRichard Tran Mills   Mat_SeqAIJMKL       *a, *b, *c;
778e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
779e8be1fc7SRichard Tran Mills   PetscErrorCode      ierr;
780e8be1fc7SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
781e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
782e8be1fc7SRichard Tran Mills   PetscObjectState    state;
783e8be1fc7SRichard Tran Mills 
784e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
785e8be1fc7SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
786e8be1fc7SRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
787e8be1fc7SRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
788e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
789e8be1fc7SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
790e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
791e8be1fc7SRichard Tran Mills   }
792e8be1fc7SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
793e8be1fc7SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
794e8be1fc7SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
795e8be1fc7SRichard Tran Mills   }
796e8be1fc7SRichard Tran Mills   csrA = a->csrA;
797e8be1fc7SRichard Tran Mills   csrB = b->csrA;
798e8be1fc7SRichard Tran Mills   csrC = c->csrA;
799e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
800e8be1fc7SRichard Tran Mills 
801e8be1fc7SRichard Tran Mills   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
802e8be1fc7SRichard Tran Mills                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
803e8be1fc7SRichard Tran Mills                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
804e8be1fc7SRichard Tran Mills 
805e8be1fc7SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
806e8be1fc7SRichard Tran Mills 
807e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8084f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
809e8be1fc7SRichard Tran Mills 
810e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
811e8be1fc7SRichard Tran Mills }
812e8be1fc7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
813e8be1fc7SRichard Tran Mills 
814372ec6bbSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
815372ec6bbSRichard Tran Mills PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
816372ec6bbSRichard Tran Mills {
817372ec6bbSRichard Tran Mills   Mat_SeqAIJMKL    *a, *b;
818372ec6bbSRichard Tran Mills   sparse_matrix_t  csrA, csrB, csrC;
819372ec6bbSRichard Tran Mills   PetscErrorCode   ierr;
820372ec6bbSRichard Tran Mills   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
821551aa5c8SRichard Tran Mills   PetscObjectState state;
822372ec6bbSRichard Tran Mills 
823372ec6bbSRichard Tran Mills   PetscFunctionBegin;
824372ec6bbSRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
825372ec6bbSRichard Tran Mills   b = (Mat_SeqAIJMKL*)B->spptr;
826551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
827551aa5c8SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
828372ec6bbSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
829372ec6bbSRichard Tran Mills   }
830551aa5c8SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
831551aa5c8SRichard Tran Mills   if (!b->sparse_optimized || b->state != state) {
832372ec6bbSRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(B);
833372ec6bbSRichard Tran Mills   }
834372ec6bbSRichard Tran Mills   csrA = a->csrA;
835372ec6bbSRichard Tran Mills   csrB = b->csrA;
836372ec6bbSRichard Tran Mills 
837372ec6bbSRichard Tran Mills   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
8389c46acdfSRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
839372ec6bbSRichard Tran Mills 
840372ec6bbSRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
841372ec6bbSRichard Tran Mills 
842372ec6bbSRichard Tran Mills   PetscFunctionReturn(0);
843372ec6bbSRichard Tran Mills }
844372ec6bbSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
845372ec6bbSRichard Tran Mills 
8464f53af40SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
8474f53af40SRichard Tran Mills PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
8484f53af40SRichard Tran Mills {
8494f53af40SRichard Tran Mills   Mat_SeqAIJMKL       *a, *p, *c;
8504f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8514f53af40SRichard Tran Mills   PetscBool           set, flag;
8524f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
853b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8544f53af40SRichard Tran Mills   PetscObjectState    state;
8554f53af40SRichard Tran Mills   PetscErrorCode      ierr;
8564f53af40SRichard Tran Mills 
8574f53af40SRichard Tran Mills   PetscFunctionBegin;
8584f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
8594f53af40SRichard Tran Mills   if (!set || (set && !flag)) {
8604f53af40SRichard Tran Mills     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
8614f53af40SRichard Tran Mills     PetscFunctionReturn(0);
8624f53af40SRichard Tran Mills   }
8634f53af40SRichard Tran Mills 
8644f53af40SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
8654f53af40SRichard Tran Mills   p = (Mat_SeqAIJMKL*)P->spptr;
8664f53af40SRichard Tran Mills   c = (Mat_SeqAIJMKL*)C->spptr;
8674f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
8684f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
8694f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
8704f53af40SRichard Tran Mills   }
8714f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
8724f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
8734f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
8744f53af40SRichard Tran Mills   }
8754f53af40SRichard Tran Mills   csrA = a->csrA;
8764f53af40SRichard Tran Mills   csrP = p->csrA;
8774f53af40SRichard Tran Mills   csrC = c->csrA;
878b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
879b9e1dd46SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
880b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8814f53af40SRichard Tran Mills 
882f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
883b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
8844f53af40SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
8854f53af40SRichard Tran Mills 
8864f53af40SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
8874f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
8884f53af40SRichard Tran Mills 
8894f53af40SRichard Tran Mills   PetscFunctionReturn(0);
8904f53af40SRichard Tran Mills }
8914f53af40SRichard Tran Mills #endif
8924f53af40SRichard Tran Mills 
8934f53af40SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
8944f53af40SRichard Tran Mills PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
8954f53af40SRichard Tran Mills {
8964f53af40SRichard Tran Mills   Mat_SeqAIJMKL       *a, *p;
8974f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8984f53af40SRichard Tran Mills   PetscBool           set, flag;
8994f53af40SRichard Tran Mills   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
900b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
9014f53af40SRichard Tran Mills   PetscObjectState    state;
9024f53af40SRichard Tran Mills   PetscErrorCode      ierr;
9034f53af40SRichard Tran Mills 
9044f53af40SRichard Tran Mills   PetscFunctionBegin;
9054f53af40SRichard Tran Mills   ierr = MatIsSymmetricKnown(A,&set,&flag);
9064f53af40SRichard Tran Mills   if (!set || (set && !flag)) {
9074f53af40SRichard Tran Mills     ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr);
9084f53af40SRichard Tran Mills     PetscFunctionReturn(0);
9094f53af40SRichard Tran Mills   }
9104f53af40SRichard Tran Mills 
9114f53af40SRichard Tran Mills   if (scall == MAT_REUSE_MATRIX) {
9124f53af40SRichard Tran Mills     ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr);
9134f53af40SRichard Tran Mills     PetscFunctionReturn(0);
9144f53af40SRichard Tran Mills   }
9154f53af40SRichard Tran Mills 
9164f53af40SRichard Tran Mills   a = (Mat_SeqAIJMKL*)A->spptr;
9174f53af40SRichard Tran Mills   p = (Mat_SeqAIJMKL*)P->spptr;
9184f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
9194f53af40SRichard Tran Mills   if (!a->sparse_optimized || a->state != state) {
9204f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(A);
9214f53af40SRichard Tran Mills   }
9224f53af40SRichard Tran Mills   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
9234f53af40SRichard Tran Mills   if (!p->sparse_optimized || p->state != state) {
9244f53af40SRichard Tran Mills     MatSeqAIJMKL_create_mkl_handle(P);
9254f53af40SRichard Tran Mills   }
9264f53af40SRichard Tran Mills   csrA = a->csrA;
9274f53af40SRichard Tran Mills   csrP = p->csrA;
928b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
929b9e1dd46SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
930b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
9314f53af40SRichard Tran Mills 
932f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
933b9e1dd46SRichard Tran Mills   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
9344f53af40SRichard Tran Mills   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
9354f53af40SRichard Tran Mills 
9364f53af40SRichard Tran Mills   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
9374f53af40SRichard Tran Mills   ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
9384f53af40SRichard Tran Mills 
9394f53af40SRichard Tran Mills   PetscFunctionReturn(0);
9404f53af40SRichard Tran Mills }
9414f53af40SRichard Tran Mills #endif
9424f53af40SRichard Tran Mills 
9434a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
9444a2a386eSRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
9454a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
9464a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
9474a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
9484a2a386eSRichard Tran Mills {
9494a2a386eSRichard Tran Mills   PetscErrorCode ierr;
9504a2a386eSRichard Tran Mills   Mat            B = *newmat;
9514a2a386eSRichard Tran Mills   Mat_SeqAIJMKL  *aijmkl;
952c9d46305SRichard Tran Mills   PetscBool      set;
953e9c94282SRichard Tran Mills   PetscBool      sametype;
9544a2a386eSRichard Tran Mills 
9554a2a386eSRichard Tran Mills   PetscFunctionBegin;
9564a2a386eSRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
9574a2a386eSRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
9584a2a386eSRichard Tran Mills   }
9594a2a386eSRichard Tran Mills 
960e9c94282SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
961e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
962e9c94282SRichard Tran Mills 
9634a2a386eSRichard Tran Mills   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
9644a2a386eSRichard Tran Mills   B->spptr = (void*) aijmkl;
9654a2a386eSRichard Tran Mills 
966df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
967969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
9684a2a386eSRichard Tran Mills   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
9694a2a386eSRichard Tran Mills   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
9704a2a386eSRichard Tran Mills   B->ops->destroy          = MatDestroy_SeqAIJMKL;
971c9d46305SRichard Tran Mills 
9724abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
973d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
974d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
975a8327b06SKarl Rupp #else
976d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
977d995685eSRichard Tran Mills #endif
9785b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
9794abfa3b3SRichard Tran Mills 
9804abfa3b3SRichard Tran Mills   /* Parse command line options. */
981c9d46305SRichard Tran Mills   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
982c9d46305SRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
9835b49642aSRichard Tran Mills   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
984c9d46305SRichard Tran Mills   ierr = PetscOptionsEnd();CHKERRQ(ierr);
985d995685eSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
986d995685eSRichard Tran Mills   if(!aijmkl->no_SpMV2) {
987d995685eSRichard Tran Mills     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
988d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
989d995685eSRichard Tran Mills   }
990d995685eSRichard Tran Mills #endif
991c9d46305SRichard Tran Mills 
992d995685eSRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
993df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
994969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
995df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
996969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
99745fbe478SRichard Tran Mills   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
998e8be1fc7SRichard Tran Mills # ifdef PETSC_HAVE_MKL_SPARSE_SP2M
999e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
10004f53af40SRichard Tran Mills #   ifndef PETSC_USE_COMPLEX
10014f53af40SRichard Tran Mills   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
10024f53af40SRichard Tran Mills   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
10034f53af40SRichard Tran Mills #   endif
1004e8be1fc7SRichard Tran Mills # endif
1005a557fde5SRichard Tran Mills   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
100650a5026bSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
100750a5026bSRichard Tran Mills 
100850a5026bSRichard Tran Mills #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
100950a5026bSRichard Tran Mills   /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were
101050a5026bSRichard Tran Mills    * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new
101150a5026bSRichard Tran Mills    * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the
101250a5026bSRichard Tran Mills    * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old
101350a5026bSRichard Tran Mills    * interface. */
101450a5026bSRichard Tran Mills   if (aijmkl->no_SpMV2) {
10154a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1016969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10174a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1018969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1019c9d46305SRichard Tran Mills   }
102050a5026bSRichard Tran Mills #endif
10214a2a386eSRichard Tran Mills 
10224a2a386eSRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1023e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
1024e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
1025e9c94282SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
102645fbe478SRichard Tran Mills   if(!aijmkl->no_SpMV2) {
102745fbe478SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102845fbe478SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1029e8be1fc7SRichard Tran Mills #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1030e8be1fc7SRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1031e8be1fc7SRichard Tran Mills #endif
1032372ec6bbSRichard Tran Mills     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
103345fbe478SRichard Tran Mills #endif
103445fbe478SRichard Tran Mills   }
10354a2a386eSRichard Tran Mills 
10364a2a386eSRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
10374a2a386eSRichard Tran Mills   *newmat = B;
10384a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10394a2a386eSRichard Tran Mills }
10404a2a386eSRichard Tran Mills 
10414a2a386eSRichard Tran Mills /*@C
10424a2a386eSRichard Tran Mills    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
10434a2a386eSRichard Tran Mills    This type inherits from AIJ and is largely identical, but uses sparse BLAS
10444a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
104590147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
104690147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
1047597ee276SRichard Tran Mills    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1048597ee276SRichard Tran Mills    symmetric A) operations are currently supported.
1049597ee276SRichard Tran Mills    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
105090147e49SRichard Tran Mills 
10514a2a386eSRichard Tran Mills    Collective on MPI_Comm
10524a2a386eSRichard Tran Mills 
10534a2a386eSRichard Tran Mills    Input Parameters:
10544a2a386eSRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
10554a2a386eSRichard Tran Mills .  m - number of rows
10564a2a386eSRichard Tran Mills .  n - number of columns
10574a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
10584a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
10594a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
10604a2a386eSRichard Tran Mills 
10614a2a386eSRichard Tran Mills    Output Parameter:
10624a2a386eSRichard Tran Mills .  A - the matrix
10634a2a386eSRichard Tran Mills 
106490147e49SRichard Tran Mills    Options Database Keys:
106566b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
106666b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
106790147e49SRichard Tran Mills 
10684a2a386eSRichard Tran Mills    Notes:
10694a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
10704a2a386eSRichard Tran Mills 
10714a2a386eSRichard Tran Mills    Level: intermediate
10724a2a386eSRichard Tran Mills 
107390147e49SRichard Tran Mills .keywords: matrix, MKL, sparse, parallel
10744a2a386eSRichard Tran Mills 
10754a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
10764a2a386eSRichard Tran Mills @*/
10774a2a386eSRichard Tran Mills PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
10784a2a386eSRichard Tran Mills {
10794a2a386eSRichard Tran Mills   PetscErrorCode ierr;
10804a2a386eSRichard Tran Mills 
10814a2a386eSRichard Tran Mills   PetscFunctionBegin;
10824a2a386eSRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
10834a2a386eSRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
10844a2a386eSRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
10854a2a386eSRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
10864a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10874a2a386eSRichard Tran Mills }
10884a2a386eSRichard Tran Mills 
10894a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
10904a2a386eSRichard Tran Mills {
10914a2a386eSRichard Tran Mills   PetscErrorCode ierr;
10924a2a386eSRichard Tran Mills 
10934a2a386eSRichard Tran Mills   PetscFunctionBegin;
10944a2a386eSRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
10954a2a386eSRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
10964a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10974a2a386eSRichard Tran Mills }
1098