xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision f4f49eeac7efa77fffa46b7ff95a3ed169f659ed)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12bdfea6dbSBarry Smith   #define MKL_ILP64
13bdfea6dbSBarry Smith #endif
14b9e7e5c1SBarry Smith #include <mkl_spblas.h>
154a2a386eSRichard Tran Mills 
164a2a386eSRichard Tran Mills typedef struct {
17c9d46305SRichard Tran Mills   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
185b49642aSRichard Tran Mills   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
194abfa3b3SRichard Tran Mills   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20551aa5c8SRichard Tran Mills   PetscObjectState state;
21ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23df555b71SRichard Tran Mills   struct matrix_descr descr;
24b8cbc1fbSRichard Tran Mills #endif
254a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
264a2a386eSRichard Tran Mills 
274a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
284a2a386eSRichard Tran Mills 
29d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
30d71ae5a4SJacob Faibussowitsch {
314a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
324a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
334a2a386eSRichard Tran Mills   Mat B = *newmat;
34ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
354a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
36c1d5218aSRichard Tran Mills #endif
374a2a386eSRichard Tran Mills 
384a2a386eSRichard Tran Mills   PetscFunctionBegin;
399566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
404a2a386eSRichard Tran Mills 
414a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4254871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
434a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
444a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4554871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
46ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4754871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
48ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
49190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
50431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
53190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
544f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
554a2a386eSRichard Tran Mills 
569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
574222ddf1SHong Zhang 
58ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
594abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
61e9c94282SRichard Tran Mills    * the spptr pointer. */
62a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
63a8327b06SKarl Rupp 
64792fecdfSBarry Smith   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
65ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
669566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
674a2a386eSRichard Tran Mills 
684a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
699566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
704a2a386eSRichard Tran Mills 
714a2a386eSRichard Tran Mills   *newmat = B;
723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
734a2a386eSRichard Tran Mills }
744a2a386eSRichard Tran Mills 
7566976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76d71ae5a4SJacob Faibussowitsch {
774a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
784a2a386eSRichard Tran Mills 
794a2a386eSRichard Tran Mills   PetscFunctionBegin;
80edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
81e9c94282SRichard Tran Mills   if (aijmkl) {
824a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
83ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
84792fecdfSBarry Smith     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
854abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
869566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
87e9c94282SRichard Tran Mills   }
884a2a386eSRichard Tran Mills 
894a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
904a2a386eSRichard Tran Mills    * to destroy everything that remains. */
919566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
922fe279fdSBarry Smith   /* I don't call MatSetType().  I believe this is because that
934a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
944a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
952e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
969566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
984a2a386eSRichard Tran Mills }
994a2a386eSRichard Tran Mills 
100190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1015b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1025b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1035b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1045b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1055b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1065b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
107d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
108d71ae5a4SJacob Faibussowitsch {
109ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1106e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1116e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1126e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
11345fbe478SRichard Tran Mills   PetscFunctionBegin;
1143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1156e369cd5SRichard Tran Mills #else
116a8327b06SKarl Rupp   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
117a8327b06SKarl Rupp   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
118a8327b06SKarl Rupp   PetscInt       m, n;
119a8327b06SKarl Rupp   MatScalar     *aa;
120a8327b06SKarl Rupp   PetscInt      *aj, *ai;
1214a2a386eSRichard Tran Mills 
122a8327b06SKarl Rupp   PetscFunctionBegin;
123e626a176SRichard Tran Mills   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
124e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
125e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
126e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
127e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1283ba16761SJacob Faibussowitsch   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
1294d51fa23SRichard Tran Mills   #endif
1306e369cd5SRichard Tran Mills 
1310632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1320632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1330632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
134792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
1350632b357SRichard Tran Mills   }
1368d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1376e369cd5SRichard Tran Mills 
138c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
139df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
140df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
141df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
14258678438SRichard Tran Mills   m                  = A->rmap->n;
14358678438SRichard Tran Mills   n                  = A->cmap->n;
144df555b71SRichard Tran Mills   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
145df555b71SRichard Tran Mills   aa                 = a->a; /* Nonzero elements stored row-by-row. */
146df555b71SRichard Tran Mills   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
1471495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1488d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1498d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
150bdfea6dbSBarry Smith     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
151792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
152792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
15348a46eb9SPierre Jolivet     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
1544abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
155*f4f49eeaSPierre Jolivet     PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
156190ae7a4SRichard Tran Mills   } else {
157f3fa974cSJacob Faibussowitsch     aijmkl->csrA = NULL;
158c9d46305SRichard Tran Mills   }
1593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
160d995685eSRichard Tran Mills #endif
1616e369cd5SRichard Tran Mills }
1626e369cd5SRichard Tran Mills 
163b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
164190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
165d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
166d71ae5a4SJacob Faibussowitsch {
16719afcda9SRichard Tran Mills   sparse_index_base_t indexing;
168190ae7a4SRichard Tran Mills   PetscInt            m, n;
16945fbe478SRichard Tran Mills   PetscInt           *aj, *ai, *dummy;
17019afcda9SRichard Tran Mills   MatScalar          *aa;
17119afcda9SRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl;
17219afcda9SRichard Tran Mills 
173362febeeSStefano Zampini   PetscFunctionBegin;
174190ae7a4SRichard Tran Mills   if (csrA) {
17545fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
176bdfea6dbSBarry Smith     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
1775f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
178190ae7a4SRichard Tran Mills   } else {
179f3fa974cSJacob Faibussowitsch     aj = ai = NULL;
180f3fa974cSJacob Faibussowitsch     aa      = NULL;
181aab60f1bSRichard Tran Mills   }
182190ae7a4SRichard Tran Mills 
1839566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
1849566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
185aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
186aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
187aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
188aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
189190ae7a4SRichard Tran Mills   if (csrA) {
1909566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
191190ae7a4SRichard Tran Mills   } else {
192190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1939566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1949566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1959566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
196190ae7a4SRichard Tran Mills   }
19719afcda9SRichard Tran Mills 
19819afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
19919afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
2009566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
2016c87cf42SRichard Tran Mills 
20219afcda9SRichard Tran Mills   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
20319afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2046c87cf42SRichard Tran Mills 
20519afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20619afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
20719afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
208f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
209f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
210f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
211190ae7a4SRichard Tran Mills   if (csrA) {
212792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
213792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
2141950a7e7SRichard Tran Mills   }
215*f4f49eeaSPierre Jolivet   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
2163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21719afcda9SRichard Tran Mills }
218b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
219190ae7a4SRichard Tran Mills 
220e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
221e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
222e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
223b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
224d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
225d71ae5a4SJacob Faibussowitsch {
226e8be1fc7SRichard Tran Mills   PetscInt            i;
227e8be1fc7SRichard Tran Mills   PetscInt            nrows, ncols;
228e8be1fc7SRichard Tran Mills   PetscInt            nz;
229e8be1fc7SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
230e8be1fc7SRichard Tran Mills   PetscScalar        *aa;
2311495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
232e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
233e8be1fc7SRichard Tran Mills 
234362febeeSStefano Zampini   PetscFunctionBegin;
235190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2363ba16761SJacob Faibussowitsch   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
237190ae7a4SRichard Tran Mills 
238e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
239bdfea6dbSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
240e8be1fc7SRichard Tran Mills 
241e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
242e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
243e8be1fc7SRichard Tran Mills   for (i = 0; i < nrows; i++) {
244e8be1fc7SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2459566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
246e8be1fc7SRichard Tran Mills   }
247e8be1fc7SRichard Tran Mills 
2489566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2499566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
250e8be1fc7SRichard Tran Mills 
251*f4f49eeaSPierre Jolivet   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
252a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
253a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
254a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
255a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
2563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
257e8be1fc7SRichard Tran Mills }
258b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
259e8be1fc7SRichard Tran Mills 
2603849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
262d71ae5a4SJacob Faibussowitsch {
2633849ddb2SRichard Tran Mills   PetscInt            i, j, k;
2643849ddb2SRichard Tran Mills   PetscInt            nrows, ncols;
2653849ddb2SRichard Tran Mills   PetscInt            nz;
2663849ddb2SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
2673849ddb2SRichard Tran Mills   PetscScalar        *aa;
2681495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
2693849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2703849ddb2SRichard Tran Mills 
271362febeeSStefano Zampini   PetscFunctionBegin;
2729566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2733849ddb2SRichard Tran Mills 
2743849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2753849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2769566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
2773ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2783849ddb2SRichard Tran Mills   }
2793849ddb2SRichard Tran Mills 
2803849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
281bdfea6dbSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
2823849ddb2SRichard Tran Mills 
2833849ddb2SRichard Tran Mills   k = 0;
2843849ddb2SRichard Tran Mills   for (i = 0; i < nrows; i++) {
2859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
2863849ddb2SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2873849ddb2SRichard Tran Mills     for (j = 0; j < nz; j++) {
2883849ddb2SRichard Tran Mills       if (aa) {
2899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
2903849ddb2SRichard Tran Mills       } else {
2919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
2923849ddb2SRichard Tran Mills       }
2933849ddb2SRichard Tran Mills       k++;
2943849ddb2SRichard Tran Mills     }
2959566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
2963849ddb2SRichard Tran Mills   }
2973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2983849ddb2SRichard Tran Mills }
2993849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3003849ddb2SRichard Tran Mills 
30166976f2fSJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
302d71ae5a4SJacob Faibussowitsch {
3031495fedeSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3046e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
3056e369cd5SRichard Tran Mills 
3066e369cd5SRichard Tran Mills   PetscFunctionBegin;
3079566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
3086e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
3099566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
3106e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3111baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3136e369cd5SRichard Tran Mills }
3146e369cd5SRichard Tran Mills 
31566976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316d71ae5a4SJacob Faibussowitsch {
3176e369cd5SRichard Tran Mills   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
3185b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
3196e369cd5SRichard Tran Mills 
3206e369cd5SRichard Tran Mills   PetscFunctionBegin;
3213ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
3226e369cd5SRichard Tran Mills 
3236e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3246e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3256e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3266e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
327d96e85feSRichard Tran Mills    * a lot of code duplication. */
3286e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
3299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3306e369cd5SRichard Tran Mills 
3315b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3325b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3335b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3341baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3364a2a386eSRichard Tran Mills }
3374a2a386eSRichard Tran Mills 
338e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
33966976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
340d71ae5a4SJacob Faibussowitsch {
3414a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3424a2a386eSRichard Tran Mills   const PetscScalar *x;
3434a2a386eSRichard Tran Mills   PetscScalar       *y;
3444a2a386eSRichard Tran Mills   const MatScalar   *aa;
3454a2a386eSRichard Tran Mills   PetscInt           m     = A->rmap->n;
346db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
347db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
348db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
3494a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
350db63039fSRichard Tran Mills   char               matdescra[6];
351db63039fSRichard Tran Mills 
3524a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
353ff03dc53SRichard Tran Mills   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
354ff03dc53SRichard Tran Mills 
355ff03dc53SRichard Tran Mills   PetscFunctionBegin;
356db63039fSRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
357db63039fSRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
3589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
360ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
361ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
362ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
363ff03dc53SRichard Tran Mills 
364ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
365db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
366ff03dc53SRichard Tran Mills 
3679566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
3689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
3703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
371ff03dc53SRichard Tran Mills }
3721950a7e7SRichard Tran Mills #endif
373ff03dc53SRichard Tran Mills 
374ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
375d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
376d71ae5a4SJacob Faibussowitsch {
377df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
378df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
379df555b71SRichard Tran Mills   const PetscScalar *x;
380df555b71SRichard Tran Mills   PetscScalar       *y;
381551aa5c8SRichard Tran Mills   PetscObjectState   state;
382df555b71SRichard Tran Mills 
383df555b71SRichard Tran Mills   PetscFunctionBegin;
38438987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
38538987b35SRichard Tran Mills   if (!a->nz) {
3869566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
3879566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->rmap->n));
3889566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
3893ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
39038987b35SRichard Tran Mills   }
391f36dfe3fSRichard Tran Mills 
3929566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3939566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
394df555b71SRichard Tran Mills 
3953fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3963fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3973fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3989566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
3999566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4003fa15762SRichard Tran Mills 
401df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
402792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
403df555b71SRichard Tran Mills 
4049566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4059566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
408df555b71SRichard Tran Mills }
409d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
410df555b71SRichard Tran Mills 
411e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
41266976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
413d71ae5a4SJacob Faibussowitsch {
414ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
415ff03dc53SRichard Tran Mills   const PetscScalar *x;
416ff03dc53SRichard Tran Mills   PetscScalar       *y;
417ff03dc53SRichard Tran Mills   const MatScalar   *aa;
418ff03dc53SRichard Tran Mills   PetscInt           m     = A->rmap->n;
419db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
420db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
421db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
422ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
423db63039fSRichard Tran Mills   char               matdescra[6];
424ff03dc53SRichard Tran Mills 
425ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
426ff03dc53SRichard Tran Mills   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
4274a2a386eSRichard Tran Mills 
4284a2a386eSRichard Tran Mills   PetscFunctionBegin;
429969800c5SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
430969800c5SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
4319566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4329566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
4334a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
4344a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
4354a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
4364a2a386eSRichard Tran Mills 
4374a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
438db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
4394a2a386eSRichard Tran Mills 
4409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4444a2a386eSRichard Tran Mills }
4451950a7e7SRichard Tran Mills #endif
4464a2a386eSRichard Tran Mills 
447ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
448d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
449d71ae5a4SJacob Faibussowitsch {
450df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
451df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
452df555b71SRichard Tran Mills   const PetscScalar *x;
453df555b71SRichard Tran Mills   PetscScalar       *y;
454551aa5c8SRichard Tran Mills   PetscObjectState   state;
455df555b71SRichard Tran Mills 
456df555b71SRichard Tran Mills   PetscFunctionBegin;
45738987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
45838987b35SRichard Tran Mills   if (!a->nz) {
4599566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
4609566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->cmap->n));
4619566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
4623ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
46338987b35SRichard Tran Mills   }
464f36dfe3fSRichard Tran Mills 
4659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
467df555b71SRichard Tran Mills 
4683fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4693fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4703fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4719566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4729566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4733fa15762SRichard Tran Mills 
474df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
475792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
476df555b71SRichard Tran Mills 
4779566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
481df555b71SRichard Tran Mills }
482d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
483df555b71SRichard Tran Mills 
484e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
48566976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
486d71ae5a4SJacob Faibussowitsch {
4874a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
4884a2a386eSRichard Tran Mills   const PetscScalar *x;
4894a2a386eSRichard Tran Mills   PetscScalar       *y, *z;
4904a2a386eSRichard Tran Mills   const MatScalar   *aa;
4914a2a386eSRichard Tran Mills   PetscInt           m = A->rmap->n;
492db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
4934a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
4944a2a386eSRichard Tran Mills   PetscInt           i;
4954a2a386eSRichard Tran Mills 
496ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
497ff03dc53SRichard Tran Mills   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
498a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
499db63039fSRichard Tran Mills   PetscScalar beta;
500a84739b8SRichard Tran Mills   char        matdescra[6];
501ff03dc53SRichard Tran Mills 
502ff03dc53SRichard Tran Mills   PetscFunctionBegin;
503a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
504a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
505a84739b8SRichard Tran Mills 
5069566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5079566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
508ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
509ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
510ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
511ff03dc53SRichard Tran Mills 
512ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
513a84739b8SRichard Tran Mills   if (zz == yy) {
514a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
515db63039fSRichard Tran Mills     beta = 1.0;
516db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
517a84739b8SRichard Tran Mills   } else {
518db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
519db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
520db63039fSRichard Tran Mills     beta = 0.0;
521db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
522ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
523a84739b8SRichard Tran Mills   }
524ff03dc53SRichard Tran Mills 
5259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
529ff03dc53SRichard Tran Mills }
5301950a7e7SRichard Tran Mills #endif
531ff03dc53SRichard Tran Mills 
532ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
533d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
534d71ae5a4SJacob Faibussowitsch {
535df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
536df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
537df555b71SRichard Tran Mills   const PetscScalar *x;
538df555b71SRichard Tran Mills   PetscScalar       *y, *z;
539df555b71SRichard Tran Mills   PetscInt           m = A->rmap->n;
540df555b71SRichard Tran Mills   PetscInt           i;
541df555b71SRichard Tran Mills 
542df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
543551aa5c8SRichard Tran Mills   PetscObjectState state;
544df555b71SRichard Tran Mills 
545df555b71SRichard Tran Mills   PetscFunctionBegin;
54638987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
54738987b35SRichard Tran Mills   if (!a->nz) {
5489566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
5499566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, m));
5509566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5513ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
55238987b35SRichard Tran Mills   }
553df555b71SRichard Tran Mills 
5549566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5559566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
556df555b71SRichard Tran Mills 
5573fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5583fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5593fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5609566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
5619566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5623fa15762SRichard Tran Mills 
563df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
564df555b71SRichard Tran Mills   if (zz == yy) {
565df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
566df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
567792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
568df555b71SRichard Tran Mills   } else {
569df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
570df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
571792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
5725f80ce2aSJacob Faibussowitsch     for (i = 0; i < m; i++) z[i] += y[i];
573df555b71SRichard Tran Mills   }
574df555b71SRichard Tran Mills 
5759566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
579df555b71SRichard Tran Mills }
580d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
581df555b71SRichard Tran Mills 
582e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
58366976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
584d71ae5a4SJacob Faibussowitsch {
585ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
586ff03dc53SRichard Tran Mills   const PetscScalar *x;
587ff03dc53SRichard Tran Mills   PetscScalar       *y, *z;
588ff03dc53SRichard Tran Mills   const MatScalar   *aa;
589ff03dc53SRichard Tran Mills   PetscInt           m = A->rmap->n;
590db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
591ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
592ff03dc53SRichard Tran Mills   PetscInt           i;
593ff03dc53SRichard Tran Mills 
594ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
595ff03dc53SRichard Tran Mills   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
596a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
597db63039fSRichard Tran Mills   PetscScalar beta;
598a84739b8SRichard Tran Mills   char        matdescra[6];
5994a2a386eSRichard Tran Mills 
6004a2a386eSRichard Tran Mills   PetscFunctionBegin;
601a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
602a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
603a84739b8SRichard Tran Mills 
6049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6059566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6064a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
6074a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
6084a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
6094a2a386eSRichard Tran Mills 
6104a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
611a84739b8SRichard Tran Mills   if (zz == yy) {
612a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
613db63039fSRichard Tran Mills     beta = 1.0;
614969800c5SRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
615a84739b8SRichard Tran Mills   } else {
616db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
617db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
618db63039fSRichard Tran Mills     beta = 0.0;
619db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
620ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
621a84739b8SRichard Tran Mills   }
6224a2a386eSRichard Tran Mills 
6239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6274a2a386eSRichard Tran Mills }
6281950a7e7SRichard Tran Mills #endif
6294a2a386eSRichard Tran Mills 
630ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
631d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
632d71ae5a4SJacob Faibussowitsch {
633df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
634df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
635df555b71SRichard Tran Mills   const PetscScalar *x;
636df555b71SRichard Tran Mills   PetscScalar       *y, *z;
637969800c5SRichard Tran Mills   PetscInt           n = A->cmap->n;
638df555b71SRichard Tran Mills   PetscInt           i;
639551aa5c8SRichard Tran Mills   PetscObjectState   state;
640df555b71SRichard Tran Mills 
641df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
642df555b71SRichard Tran Mills 
643df555b71SRichard Tran Mills   PetscFunctionBegin;
64438987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
64538987b35SRichard Tran Mills   if (!a->nz) {
6469566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6479566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, n));
6489566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6493ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
65038987b35SRichard Tran Mills   }
651f36dfe3fSRichard Tran Mills 
6529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
654df555b71SRichard Tran Mills 
6553fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6563fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6573fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6589566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6593ba16761SJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
6603fa15762SRichard Tran Mills 
661df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
662df555b71SRichard Tran Mills   if (zz == yy) {
663df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
664df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
665792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
666df555b71SRichard Tran Mills   } else {
667df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
668df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
669792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
6705f80ce2aSJacob Faibussowitsch     for (i = 0; i < n; i++) z[i] += y[i];
671df555b71SRichard Tran Mills   }
672df555b71SRichard Tran Mills 
6739566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
677df555b71SRichard Tran Mills }
678d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
679df555b71SRichard Tran Mills 
6808a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
681d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
682d71ae5a4SJacob Faibussowitsch {
6831495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
684431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
685190ae7a4SRichard Tran Mills   PetscInt            nrows, ncols;
686431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
687431879ecSRichard Tran Mills   PetscObjectState    state;
688431879ecSRichard Tran Mills 
689431879ecSRichard Tran Mills   PetscFunctionBegin;
690190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
691190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
692190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
693190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
694190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
695190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
696190ae7a4SRichard Tran Mills 
6979566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6989566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
6999566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7009566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
701431879ecSRichard Tran Mills   csrA                = a->csrA;
702431879ecSRichard Tran Mills   csrB                = b->csrA;
703431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
704431879ecSRichard Tran Mills 
705190ae7a4SRichard Tran Mills   if (csrA && csrB) {
706792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
707190ae7a4SRichard Tran Mills   } else {
708f3fa974cSJacob Faibussowitsch     csrC = NULL;
709190ae7a4SRichard Tran Mills   }
710190ae7a4SRichard Tran Mills 
7119566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
7123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
713431879ecSRichard Tran Mills }
714431879ecSRichard Tran Mills 
715d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
716d71ae5a4SJacob Faibussowitsch {
7171495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
718e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
719e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
720e8be1fc7SRichard Tran Mills   PetscObjectState    state;
721e8be1fc7SRichard Tran Mills 
722e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7239566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7249566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7259566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7269566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
727e8be1fc7SRichard Tran Mills   csrA                = a->csrA;
728e8be1fc7SRichard Tran Mills   csrB                = b->csrA;
729e8be1fc7SRichard Tran Mills   csrC                = c->csrA;
730e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
731e8be1fc7SRichard Tran Mills 
732190ae7a4SRichard Tran Mills   if (csrA && csrB) {
733792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
734190ae7a4SRichard Tran Mills   } else {
735f3fa974cSJacob Faibussowitsch     csrC = NULL;
736190ae7a4SRichard Tran Mills   }
737e8be1fc7SRichard Tran Mills 
738e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7399566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
7403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
741e8be1fc7SRichard Tran Mills }
742e8be1fc7SRichard Tran Mills 
743d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
744d71ae5a4SJacob Faibussowitsch {
745190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7469566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
748190ae7a4SRichard Tran Mills }
749190ae7a4SRichard Tran Mills 
750d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
751d71ae5a4SJacob Faibussowitsch {
752190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7539566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
755190ae7a4SRichard Tran Mills }
756190ae7a4SRichard Tran Mills 
757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
758d71ae5a4SJacob Faibussowitsch {
759190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7609566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
762190ae7a4SRichard Tran Mills }
763190ae7a4SRichard Tran Mills 
764d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
765d71ae5a4SJacob Faibussowitsch {
766190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7679566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
769190ae7a4SRichard Tran Mills }
770190ae7a4SRichard Tran Mills 
771d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
772d71ae5a4SJacob Faibussowitsch {
773190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7749566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
7753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
776190ae7a4SRichard Tran Mills }
777190ae7a4SRichard Tran Mills 
778d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
779d71ae5a4SJacob Faibussowitsch {
780190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7819566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
7823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
783190ae7a4SRichard Tran Mills }
784190ae7a4SRichard Tran Mills 
785d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
786d71ae5a4SJacob Faibussowitsch {
787190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
788190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
789190ae7a4SRichard Tran Mills 
790190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7919566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
7923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
793190ae7a4SRichard Tran Mills }
794190ae7a4SRichard Tran Mills 
795d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796d71ae5a4SJacob Faibussowitsch {
797190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
798190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
799190ae7a4SRichard Tran Mills   PetscReal    fill = product->fill;
800190ae7a4SRichard Tran Mills 
801190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8029566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
803190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
8043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
805190ae7a4SRichard Tran Mills }
806190ae7a4SRichard Tran Mills 
807d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
808d71ae5a4SJacob Faibussowitsch {
809190ae7a4SRichard Tran Mills   Mat                 Ct;
810190ae7a4SRichard Tran Mills   Vec                 zeros;
8111495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
8124f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8134f53af40SRichard Tran Mills   PetscBool           set, flag;
814b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8154f53af40SRichard Tran Mills   PetscObjectState    state;
8164f53af40SRichard Tran Mills 
8174f53af40SRichard Tran Mills   PetscFunctionBegin;
8189566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
819b94d7dedSBarry Smith   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8204f53af40SRichard Tran Mills 
8219566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8229566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8239566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8249566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8254f53af40SRichard Tran Mills   csrA                = a->csrA;
8264f53af40SRichard Tran Mills   csrP                = p->csrA;
8274f53af40SRichard Tran Mills   csrC                = c->csrA;
828b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
829190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
830b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8314f53af40SRichard Tran Mills 
8322fe279fdSBarry Smith   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
833792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
8344f53af40SRichard Tran Mills 
835190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
836190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
837190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8387301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
839190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
840190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
841190ae7a4SRichard Tran Mills    * the full matrix. */
8429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8439566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
8449566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C, &zeros, NULL));
8459566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8469566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8479566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
8489566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
849190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
850f3fa974cSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
8519566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
8529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8539566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8549566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8564f53af40SRichard Tran Mills }
857190ae7a4SRichard Tran Mills 
858d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
859d71ae5a4SJacob Faibussowitsch {
860190ae7a4SRichard Tran Mills   Mat_Product        *product = C->product;
861190ae7a4SRichard Tran Mills   Mat                 A = product->A, P = product->B;
8621495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
863190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
864190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
865190ae7a4SRichard Tran Mills   PetscObjectState    state;
866190ae7a4SRichard Tran Mills 
867190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8689566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8699566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8709566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8719566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
872190ae7a4SRichard Tran Mills   csrA                = a->csrA;
873190ae7a4SRichard Tran Mills   csrP                = p->csrA;
874190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
875190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
876190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
877190ae7a4SRichard Tran Mills 
8782fe279fdSBarry Smith   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
879190ae7a4SRichard Tran Mills   if (csrP && csrA) {
880792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
881190ae7a4SRichard Tran Mills   } else {
882f3fa974cSJacob Faibussowitsch     csrC = NULL;
883190ae7a4SRichard Tran Mills   }
884190ae7a4SRichard Tran Mills 
885190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
886190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
88749ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
88849ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
88949ba5396SRichard Tran Mills    * is guaranteed. */
8909566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
891190ae7a4SRichard Tran Mills 
892190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
8933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
894190ae7a4SRichard Tran Mills }
895190ae7a4SRichard Tran Mills 
896d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
897d71ae5a4SJacob Faibussowitsch {
898190ae7a4SRichard Tran Mills   PetscFunctionBegin;
899190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
900190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
9013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
902190ae7a4SRichard Tran Mills }
903190ae7a4SRichard Tran Mills 
904d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
905d71ae5a4SJacob Faibussowitsch {
906190ae7a4SRichard Tran Mills   PetscFunctionBegin;
907190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
9083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
909190ae7a4SRichard Tran Mills }
910190ae7a4SRichard Tran Mills 
911d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
912d71ae5a4SJacob Faibussowitsch {
913190ae7a4SRichard Tran Mills   PetscFunctionBegin;
914190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
915190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
9163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
917190ae7a4SRichard Tran Mills }
918190ae7a4SRichard Tran Mills 
919d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
920d71ae5a4SJacob Faibussowitsch {
921190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
922190ae7a4SRichard Tran Mills   Mat          A       = product->A;
923190ae7a4SRichard Tran Mills   PetscBool    set, flag;
924190ae7a4SRichard Tran Mills 
925190ae7a4SRichard Tran Mills   PetscFunctionBegin;
926a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
9272ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
9282ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
929190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
9302ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
931190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
932a3d67537SPierre Jolivet   } else {
933190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9349566063dSJacob Faibussowitsch     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
935a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
936a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9372fe279fdSBarry Smith     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
938190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
939a3d67537SPierre Jolivet   }
9403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
941190ae7a4SRichard Tran Mills }
942190ae7a4SRichard Tran Mills 
943d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
944d71ae5a4SJacob Faibussowitsch {
945190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9462ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
948190ae7a4SRichard Tran Mills }
949190ae7a4SRichard Tran Mills 
950d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
951d71ae5a4SJacob Faibussowitsch {
952190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9532ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
955190ae7a4SRichard Tran Mills }
956190ae7a4SRichard Tran Mills 
957d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
958d71ae5a4SJacob Faibussowitsch {
959190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
960190ae7a4SRichard Tran Mills 
961190ae7a4SRichard Tran Mills   PetscFunctionBegin;
962190ae7a4SRichard Tran Mills   switch (product->type) {
963d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
964d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
965d71ae5a4SJacob Faibussowitsch     break;
966d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AtB:
967d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
968d71ae5a4SJacob Faibussowitsch     break;
969d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
970d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
971d71ae5a4SJacob Faibussowitsch     break;
972d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_PtAP:
973d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
974d71ae5a4SJacob Faibussowitsch     break;
975d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_RARt:
976d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
977d71ae5a4SJacob Faibussowitsch     break;
978d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABC:
979d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
980d71ae5a4SJacob Faibussowitsch     break;
981d71ae5a4SJacob Faibussowitsch   default:
982d71ae5a4SJacob Faibussowitsch     break;
983190ae7a4SRichard Tran Mills   }
9843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
985190ae7a4SRichard Tran Mills }
986431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
9874f53af40SRichard Tran Mills 
9884a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
989510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
9904a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
9914a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
992d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
993d71ae5a4SJacob Faibussowitsch {
9944a2a386eSRichard Tran Mills   Mat            B = *newmat;
9954a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
996c9d46305SRichard Tran Mills   PetscBool      set;
997e9c94282SRichard Tran Mills   PetscBool      sametype;
9984a2a386eSRichard Tran Mills 
9994a2a386eSRichard Tran Mills   PetscFunctionBegin;
10009566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
10014a2a386eSRichard Tran Mills 
10029566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
10033ba16761SJacob Faibussowitsch   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1004e9c94282SRichard Tran Mills 
10054dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aijmkl));
10064a2a386eSRichard Tran Mills   B->spptr = (void *)aijmkl;
10074a2a386eSRichard Tran Mills 
1008df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1009969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
10104a2a386eSRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
10114a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
10124a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1013c9d46305SRichard Tran Mills 
10144abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1015ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1016d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1017a8327b06SKarl Rupp #else
1018d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1019d995685eSRichard Tran Mills #endif
10205b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
10214abfa3b3SRichard Tran Mills 
10224abfa3b3SRichard Tran Mills   /* Parse command line options. */
1023d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
10249566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
10259566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1026d0609cedSBarry Smith   PetscOptionsEnd();
1027ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1028d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
10299566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1030d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1031d995685eSRichard Tran Mills   }
1032d995685eSRichard Tran Mills #endif
1033c9d46305SRichard Tran Mills 
1034ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1036969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1037df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1038969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
10398a369200SRichard Tran Mills   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1040190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1041190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1042190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1043190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1044190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1045ffcab697SRichard Tran Mills     #if !defined(PETSC_USE_COMPLEX)
104649ba5396SRichard Tran Mills   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1047190ae7a4SRichard Tran Mills     #else
1048190ae7a4SRichard Tran Mills   B->ops->ptapnumeric = NULL;
10494f53af40SRichard Tran Mills     #endif
1050e8be1fc7SRichard Tran Mills   #endif
10511950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10521950a7e7SRichard Tran Mills 
1053213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1054213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1055213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1056213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1057213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10581950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10594a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1060969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10614a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1062969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1063c9d46305SRichard Tran Mills   }
10641950a7e7SRichard Tran Mills #endif
10654a2a386eSRichard Tran Mills 
10669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
10674a2a386eSRichard Tran Mills 
10689566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
10694a2a386eSRichard Tran Mills   *newmat = B;
10703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10714a2a386eSRichard Tran Mills }
10724a2a386eSRichard Tran Mills 
10734a2a386eSRichard Tran Mills /*@C
107411a5261eSBarry Smith   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
107590147e49SRichard Tran Mills 
1076d083f849SBarry Smith   Collective
10774a2a386eSRichard Tran Mills 
10784a2a386eSRichard Tran Mills   Input Parameters:
107911a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
10804a2a386eSRichard Tran Mills . m    - number of rows
10814a2a386eSRichard Tran Mills . n    - number of columns
10824a2a386eSRichard Tran Mills . nz   - number of nonzeros per row (same for all rows)
10834a2a386eSRichard Tran Mills - nnz  - array containing the number of nonzeros in the various rows
10842ef1f0ffSBarry Smith          (possibly different for each row) or `NULL`
10854a2a386eSRichard Tran Mills 
10864a2a386eSRichard Tran Mills   Output Parameter:
10874a2a386eSRichard Tran Mills . A - the matrix
10884a2a386eSRichard Tran Mills 
108990147e49SRichard Tran Mills   Options Database Keys:
109066b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
10912ef1f0ffSBarry Smith - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
10922ef1f0ffSBarry Smith                                   performing this step the first time the matrix is applied
10934a2a386eSRichard Tran Mills 
10944a2a386eSRichard Tran Mills   Level: intermediate
10954a2a386eSRichard Tran Mills 
10962fe279fdSBarry Smith   Notes:
10972ef1f0ffSBarry Smith   If `nnz` is given then `nz` is ignored
10982ef1f0ffSBarry Smith 
10992fe279fdSBarry Smith   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
11002fe279fdSBarry Smith   routines from Intel MKL whenever possible.
11012fe279fdSBarry Smith 
11022fe279fdSBarry Smith   If the installed version of MKL supports the "SpMV2" sparse
11032fe279fdSBarry Smith   inspector-executor routines, then those are used by default.
11042fe279fdSBarry Smith 
11052fe279fdSBarry Smith   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
11062fe279fdSBarry Smith   (for symmetric A) operations are currently supported.
11072fe279fdSBarry Smith   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
11082fe279fdSBarry Smith 
11091cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
11104a2a386eSRichard Tran Mills @*/
1111d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1112d71ae5a4SJacob Faibussowitsch {
11134a2a386eSRichard Tran Mills   PetscFunctionBegin;
11149566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
11159566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
11169566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJMKL));
11179566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
11183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11194a2a386eSRichard Tran Mills }
11204a2a386eSRichard Tran Mills 
1121d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1122d71ae5a4SJacob Faibussowitsch {
11234a2a386eSRichard Tran Mills   PetscFunctionBegin;
11249566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
11259566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
11263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11274a2a386eSRichard Tran Mills }
1128