xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1b9e7e5c1SBarry Smith 
24a2a386eSRichard Tran Mills /*
34a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
44a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
54a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
64a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
74a2a386eSRichard Tran Mills   wherever possible.
84a2a386eSRichard Tran Mills */
94a2a386eSRichard Tran Mills 
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
114a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12b9e7e5c1SBarry Smith #include <mkl_spblas.h>
134a2a386eSRichard Tran Mills 
144a2a386eSRichard Tran Mills typedef struct {
15c9d46305SRichard Tran Mills   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
165b49642aSRichard Tran Mills   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
174abfa3b3SRichard Tran Mills   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18551aa5c8SRichard Tran Mills   PetscObjectState state;
19ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21df555b71SRichard Tran Mills   struct matrix_descr descr;
22b8cbc1fbSRichard Tran Mills #endif
234a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
244a2a386eSRichard Tran Mills 
254a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
264a2a386eSRichard Tran Mills 
27*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28*d71ae5a4SJacob Faibussowitsch {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   Mat B = *newmat;
32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
334a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34c1d5218aSRichard Tran Mills #endif
354a2a386eSRichard Tran Mills 
364a2a386eSRichard Tran Mills   PetscFunctionBegin;
379566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
384a2a386eSRichard Tran Mills 
394a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4054871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
414a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
424a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4354871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
44ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4554871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
524f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
534a2a386eSRichard Tran Mills 
549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
554222ddf1SHong Zhang 
56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
574abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
59e9c94282SRichard Tran Mills    * the spptr pointer. */
60a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
61a8327b06SKarl Rupp 
62792fecdfSBarry Smith   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
649566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
654a2a386eSRichard Tran Mills 
664a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
679566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
684a2a386eSRichard Tran Mills 
694a2a386eSRichard Tran Mills   *newmat = B;
704a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
714a2a386eSRichard Tran Mills }
724a2a386eSRichard Tran Mills 
73*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74*d71ae5a4SJacob Faibussowitsch {
754a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
764a2a386eSRichard Tran Mills 
774a2a386eSRichard Tran Mills   PetscFunctionBegin;
78e9c94282SRichard Tran Mills 
79edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80e9c94282SRichard Tran Mills   if (aijmkl) {
814a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
83792fecdfSBarry Smith     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
844abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
859566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
86e9c94282SRichard Tran Mills   }
874a2a386eSRichard Tran Mills 
884a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
894a2a386eSRichard Tran Mills    * to destroy everything that remains. */
909566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
914a2a386eSRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
924a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
934a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
942e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
959566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
964a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
974a2a386eSRichard Tran Mills }
984a2a386eSRichard Tran Mills 
99190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
1005b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1015b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1025b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1035b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1045b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1055b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
106*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
107*d71ae5a4SJacob Faibussowitsch {
108ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1096e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1106e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1116e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
11245fbe478SRichard Tran Mills   PetscFunctionBegin;
1136e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
1146e369cd5SRichard Tran Mills #else
115a8327b06SKarl Rupp   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
116a8327b06SKarl Rupp   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
117a8327b06SKarl Rupp   PetscInt       m, n;
118a8327b06SKarl Rupp   MatScalar     *aa;
119a8327b06SKarl Rupp   PetscInt      *aj, *ai;
1204a2a386eSRichard Tran Mills 
121a8327b06SKarl Rupp   PetscFunctionBegin;
122e626a176SRichard Tran Mills   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
123e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
124e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
125e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
126e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1276e369cd5SRichard Tran Mills   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
1284d51fa23SRichard Tran Mills   #endif
1296e369cd5SRichard Tran Mills 
1300632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1310632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1320632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
133792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
1340632b357SRichard Tran Mills   }
1358d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1366e369cd5SRichard Tran Mills 
137c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
138df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
139df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
140df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
14158678438SRichard Tran Mills   m                  = A->rmap->n;
14258678438SRichard Tran Mills   n                  = A->cmap->n;
143df555b71SRichard Tran Mills   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
144df555b71SRichard Tran Mills   aa                 = a->a; /* Nonzero elements stored row-by-row. */
145df555b71SRichard Tran Mills   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
1461495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1478d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1488d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
149792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
150792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
151792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
15248a46eb9SPierre Jolivet     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
1534abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
1549566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
155190ae7a4SRichard Tran Mills   } else {
156190ae7a4SRichard Tran Mills     aijmkl->csrA = PETSC_NULL;
157c9d46305SRichard Tran Mills   }
1586e369cd5SRichard Tran Mills 
1596e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
160d995685eSRichard Tran Mills #endif
1616e369cd5SRichard Tran Mills }
1626e369cd5SRichard Tran Mills 
163b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
164190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
165*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
166*d71ae5a4SJacob Faibussowitsch {
16719afcda9SRichard Tran Mills   sparse_index_base_t indexing;
168190ae7a4SRichard Tran Mills   PetscInt            m, n;
16945fbe478SRichard Tran Mills   PetscInt           *aj, *ai, *dummy;
17019afcda9SRichard Tran Mills   MatScalar          *aa;
17119afcda9SRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl;
17219afcda9SRichard Tran Mills 
173362febeeSStefano Zampini   PetscFunctionBegin;
174190ae7a4SRichard Tran Mills   if (csrA) {
17545fbe478SRichard Tran Mills     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
176792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
1775f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
178190ae7a4SRichard Tran Mills   } else {
179190ae7a4SRichard Tran Mills     aj = ai = PETSC_NULL;
180190ae7a4SRichard Tran Mills     aa      = PETSC_NULL;
181aab60f1bSRichard Tran Mills   }
182190ae7a4SRichard Tran Mills 
1839566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
1849566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
185aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
186aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
187aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
188aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
189190ae7a4SRichard Tran Mills   if (csrA) {
1909566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
191190ae7a4SRichard Tran Mills   } else {
192190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1939566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1949566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1959566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
196190ae7a4SRichard Tran Mills   }
19719afcda9SRichard Tran Mills 
19819afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
19919afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
2009566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
2016c87cf42SRichard Tran Mills 
20219afcda9SRichard Tran Mills   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
20319afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2046c87cf42SRichard Tran Mills 
20519afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20619afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
20719afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
208f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
209f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
210f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
211190ae7a4SRichard Tran Mills   if (csrA) {
212792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
213792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
2141950a7e7SRichard Tran Mills   }
2159566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
21619afcda9SRichard Tran Mills   PetscFunctionReturn(0);
21719afcda9SRichard Tran Mills }
218b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
219190ae7a4SRichard Tran Mills 
220e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
221e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
222e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
223b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
224*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
225*d71ae5a4SJacob Faibussowitsch {
226e8be1fc7SRichard Tran Mills   PetscInt            i;
227e8be1fc7SRichard Tran Mills   PetscInt            nrows, ncols;
228e8be1fc7SRichard Tran Mills   PetscInt            nz;
229e8be1fc7SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
230e8be1fc7SRichard Tran Mills   PetscScalar        *aa;
2311495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
232e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
233e8be1fc7SRichard Tran Mills 
234362febeeSStefano Zampini   PetscFunctionBegin;
235190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
236190ae7a4SRichard Tran Mills   if (!aijmkl->csrA) PetscFunctionReturn(0);
237190ae7a4SRichard Tran Mills 
238e8be1fc7SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
239792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
240e8be1fc7SRichard Tran Mills 
241e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
242e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
243e8be1fc7SRichard Tran Mills   for (i = 0; i < nrows; i++) {
244e8be1fc7SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2459566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
246e8be1fc7SRichard Tran Mills   }
247e8be1fc7SRichard Tran Mills 
2489566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2499566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
250e8be1fc7SRichard Tran Mills 
2519566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
252a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
253a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
254a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
255a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
256e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
257e8be1fc7SRichard Tran Mills }
258b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
259e8be1fc7SRichard Tran Mills 
2603849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
262*d71ae5a4SJacob Faibussowitsch {
2633849ddb2SRichard Tran Mills   PetscInt            i, j, k;
2643849ddb2SRichard Tran Mills   PetscInt            nrows, ncols;
2653849ddb2SRichard Tran Mills   PetscInt            nz;
2663849ddb2SRichard Tran Mills   PetscInt           *ai, *aj, *dummy;
2673849ddb2SRichard Tran Mills   PetscScalar        *aa;
2681495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
2693849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2703849ddb2SRichard Tran Mills 
271362febeeSStefano Zampini   PetscFunctionBegin;
2729566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2733849ddb2SRichard Tran Mills 
2743849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2753849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2769566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
2773849ddb2SRichard Tran Mills     PetscFunctionReturn(0);
2783849ddb2SRichard Tran Mills   }
2793849ddb2SRichard Tran Mills 
2803849ddb2SRichard Tran Mills   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
281792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
2823849ddb2SRichard Tran Mills 
2833849ddb2SRichard Tran Mills   k = 0;
2843849ddb2SRichard Tran Mills   for (i = 0; i < nrows; i++) {
2859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
2863849ddb2SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2873849ddb2SRichard Tran Mills     for (j = 0; j < nz; j++) {
2883849ddb2SRichard Tran Mills       if (aa) {
2899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
2903849ddb2SRichard Tran Mills       } else {
2919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
2923849ddb2SRichard Tran Mills       }
2933849ddb2SRichard Tran Mills       k++;
2943849ddb2SRichard Tran Mills     }
2959566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
2963849ddb2SRichard Tran Mills   }
2973849ddb2SRichard Tran Mills   PetscFunctionReturn(0);
2983849ddb2SRichard Tran Mills }
2993849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
3003849ddb2SRichard Tran Mills 
301*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
302*d71ae5a4SJacob Faibussowitsch {
3031495fedeSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3046e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
3056e369cd5SRichard Tran Mills 
3066e369cd5SRichard Tran Mills   PetscFunctionBegin;
3079566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
3086e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
3099566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
3106e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3111baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3126e369cd5SRichard Tran Mills   PetscFunctionReturn(0);
3136e369cd5SRichard Tran Mills }
3146e369cd5SRichard Tran Mills 
315*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316*d71ae5a4SJacob Faibussowitsch {
3176e369cd5SRichard Tran Mills   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
3185b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
3196e369cd5SRichard Tran Mills 
3206e369cd5SRichard Tran Mills   PetscFunctionBegin;
3216e369cd5SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
3226e369cd5SRichard Tran Mills 
3236e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3246e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3256e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3266e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
327d96e85feSRichard Tran Mills    * a lot of code duplication. */
3286e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
3299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3306e369cd5SRichard Tran Mills 
3315b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3325b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3335b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3341baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
335df555b71SRichard Tran Mills 
3364a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
3374a2a386eSRichard Tran Mills }
3384a2a386eSRichard Tran Mills 
339e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
340*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
341*d71ae5a4SJacob Faibussowitsch {
3424a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3434a2a386eSRichard Tran Mills   const PetscScalar *x;
3444a2a386eSRichard Tran Mills   PetscScalar       *y;
3454a2a386eSRichard Tran Mills   const MatScalar   *aa;
3464a2a386eSRichard Tran Mills   PetscInt           m     = A->rmap->n;
347db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
348db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
349db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
3504a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
351db63039fSRichard Tran Mills   char               matdescra[6];
352db63039fSRichard Tran Mills 
3534a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
354ff03dc53SRichard Tran Mills   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
355ff03dc53SRichard Tran Mills 
356ff03dc53SRichard Tran Mills   PetscFunctionBegin;
357db63039fSRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
358db63039fSRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
3599566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3609566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
361ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
362ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
363ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
364ff03dc53SRichard Tran Mills 
365ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
366db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
367ff03dc53SRichard Tran Mills 
3689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
3699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
371ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
372ff03dc53SRichard Tran Mills }
3731950a7e7SRichard Tran Mills #endif
374ff03dc53SRichard Tran Mills 
375ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
376*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
377*d71ae5a4SJacob Faibussowitsch {
378df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
379df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
380df555b71SRichard Tran Mills   const PetscScalar *x;
381df555b71SRichard Tran Mills   PetscScalar       *y;
382551aa5c8SRichard Tran Mills   PetscObjectState   state;
383df555b71SRichard Tran Mills 
384df555b71SRichard Tran Mills   PetscFunctionBegin;
385df555b71SRichard Tran Mills 
38638987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
38738987b35SRichard Tran Mills   if (!a->nz) {
3889566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
3899566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->rmap->n));
3909566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
39138987b35SRichard Tran Mills     PetscFunctionReturn(0);
39238987b35SRichard Tran Mills   }
393f36dfe3fSRichard Tran Mills 
3949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3959566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
396df555b71SRichard Tran Mills 
3973fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3983fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3993fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4009566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4019566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4023fa15762SRichard Tran Mills 
403df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
404792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
405df555b71SRichard Tran Mills 
4069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
409df555b71SRichard Tran Mills   PetscFunctionReturn(0);
410df555b71SRichard Tran Mills }
411d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
412df555b71SRichard Tran Mills 
413e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
414*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
415*d71ae5a4SJacob Faibussowitsch {
416ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
417ff03dc53SRichard Tran Mills   const PetscScalar *x;
418ff03dc53SRichard Tran Mills   PetscScalar       *y;
419ff03dc53SRichard Tran Mills   const MatScalar   *aa;
420ff03dc53SRichard Tran Mills   PetscInt           m     = A->rmap->n;
421db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
422db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
423db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
424ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
425db63039fSRichard Tran Mills   char               matdescra[6];
426ff03dc53SRichard Tran Mills 
427ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
428ff03dc53SRichard Tran Mills   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
4294a2a386eSRichard Tran Mills 
4304a2a386eSRichard Tran Mills   PetscFunctionBegin;
431969800c5SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
432969800c5SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
4339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4349566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
4354a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
4364a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
4374a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
4384a2a386eSRichard Tran Mills 
4394a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
440db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
4414a2a386eSRichard Tran Mills 
4429566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4454a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
4464a2a386eSRichard Tran Mills }
4471950a7e7SRichard Tran Mills #endif
4484a2a386eSRichard Tran Mills 
449ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
450*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
451*d71ae5a4SJacob Faibussowitsch {
452df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
453df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
454df555b71SRichard Tran Mills   const PetscScalar *x;
455df555b71SRichard Tran Mills   PetscScalar       *y;
456551aa5c8SRichard Tran Mills   PetscObjectState   state;
457df555b71SRichard Tran Mills 
458df555b71SRichard Tran Mills   PetscFunctionBegin;
459df555b71SRichard Tran Mills 
46038987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
46138987b35SRichard Tran Mills   if (!a->nz) {
4629566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
4639566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->cmap->n));
4649566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
46538987b35SRichard Tran Mills     PetscFunctionReturn(0);
46638987b35SRichard Tran Mills   }
467f36dfe3fSRichard Tran Mills 
4689566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4699566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
470df555b71SRichard Tran Mills 
4713fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4723fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4733fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4749566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4759566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4763fa15762SRichard Tran Mills 
477df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
478792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
479df555b71SRichard Tran Mills 
4809566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
483df555b71SRichard Tran Mills   PetscFunctionReturn(0);
484df555b71SRichard Tran Mills }
485d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
486df555b71SRichard Tran Mills 
487e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
488*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
489*d71ae5a4SJacob Faibussowitsch {
4904a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
4914a2a386eSRichard Tran Mills   const PetscScalar *x;
4924a2a386eSRichard Tran Mills   PetscScalar       *y, *z;
4934a2a386eSRichard Tran Mills   const MatScalar   *aa;
4944a2a386eSRichard Tran Mills   PetscInt           m = A->rmap->n;
495db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
4964a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
4974a2a386eSRichard Tran Mills   PetscInt           i;
4984a2a386eSRichard Tran Mills 
499ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
500ff03dc53SRichard Tran Mills   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
501a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
502db63039fSRichard Tran Mills   PetscScalar beta;
503a84739b8SRichard Tran Mills   char        matdescra[6];
504ff03dc53SRichard Tran Mills 
505ff03dc53SRichard Tran Mills   PetscFunctionBegin;
506a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
507a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
508a84739b8SRichard Tran Mills 
5099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
511ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
512ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
513ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
514ff03dc53SRichard Tran Mills 
515ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
516a84739b8SRichard Tran Mills   if (zz == yy) {
517a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
518db63039fSRichard Tran Mills     beta = 1.0;
519db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520a84739b8SRichard Tran Mills   } else {
521db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
522db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
523db63039fSRichard Tran Mills     beta = 0.0;
524db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
525ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
526a84739b8SRichard Tran Mills   }
527ff03dc53SRichard Tran Mills 
5289566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
531ff03dc53SRichard Tran Mills   PetscFunctionReturn(0);
532ff03dc53SRichard Tran Mills }
5331950a7e7SRichard Tran Mills #endif
534ff03dc53SRichard Tran Mills 
535ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
536*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
537*d71ae5a4SJacob Faibussowitsch {
538df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
539df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
540df555b71SRichard Tran Mills   const PetscScalar *x;
541df555b71SRichard Tran Mills   PetscScalar       *y, *z;
542df555b71SRichard Tran Mills   PetscInt           m = A->rmap->n;
543df555b71SRichard Tran Mills   PetscInt           i;
544df555b71SRichard Tran Mills 
545df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
546551aa5c8SRichard Tran Mills   PetscObjectState state;
547df555b71SRichard Tran Mills 
548df555b71SRichard Tran Mills   PetscFunctionBegin;
549df555b71SRichard Tran Mills 
55038987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
55138987b35SRichard Tran Mills   if (!a->nz) {
5529566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
5539566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, m));
5549566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
55538987b35SRichard Tran Mills     PetscFunctionReturn(0);
55638987b35SRichard Tran Mills   }
557df555b71SRichard Tran Mills 
5589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5599566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
560df555b71SRichard Tran Mills 
5613fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5623fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5633fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5649566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
5659566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5663fa15762SRichard Tran Mills 
567df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
568df555b71SRichard Tran Mills   if (zz == yy) {
569df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
570df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
571792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
572df555b71SRichard Tran Mills   } else {
573df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
574df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
575792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
5765f80ce2aSJacob Faibussowitsch     for (i = 0; i < m; i++) z[i] += y[i];
577df555b71SRichard Tran Mills   }
578df555b71SRichard Tran Mills 
5799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
582df555b71SRichard Tran Mills   PetscFunctionReturn(0);
583df555b71SRichard Tran Mills }
584d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
585df555b71SRichard Tran Mills 
586e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
587*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
588*d71ae5a4SJacob Faibussowitsch {
589ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
590ff03dc53SRichard Tran Mills   const PetscScalar *x;
591ff03dc53SRichard Tran Mills   PetscScalar       *y, *z;
592ff03dc53SRichard Tran Mills   const MatScalar   *aa;
593ff03dc53SRichard Tran Mills   PetscInt           m = A->rmap->n;
594db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
595ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
596ff03dc53SRichard Tran Mills   PetscInt           i;
597ff03dc53SRichard Tran Mills 
598ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
599ff03dc53SRichard Tran Mills   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
600a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
601db63039fSRichard Tran Mills   PetscScalar beta;
602a84739b8SRichard Tran Mills   char        matdescra[6];
6034a2a386eSRichard Tran Mills 
6044a2a386eSRichard Tran Mills   PetscFunctionBegin;
605a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
606a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
607a84739b8SRichard Tran Mills 
6089566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6104a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
6114a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
6124a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
6134a2a386eSRichard Tran Mills 
6144a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
615a84739b8SRichard Tran Mills   if (zz == yy) {
616a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
617db63039fSRichard Tran Mills     beta = 1.0;
618969800c5SRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
619a84739b8SRichard Tran Mills   } else {
620db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
621db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
622db63039fSRichard Tran Mills     beta = 0.0;
623db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
624ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
625a84739b8SRichard Tran Mills   }
6264a2a386eSRichard Tran Mills 
6279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6304a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
6314a2a386eSRichard Tran Mills }
6321950a7e7SRichard Tran Mills #endif
6334a2a386eSRichard Tran Mills 
634ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
635*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
636*d71ae5a4SJacob Faibussowitsch {
637df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
638df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
639df555b71SRichard Tran Mills   const PetscScalar *x;
640df555b71SRichard Tran Mills   PetscScalar       *y, *z;
641969800c5SRichard Tran Mills   PetscInt           n = A->cmap->n;
642df555b71SRichard Tran Mills   PetscInt           i;
643551aa5c8SRichard Tran Mills   PetscObjectState   state;
644df555b71SRichard Tran Mills 
645df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
646df555b71SRichard Tran Mills 
647df555b71SRichard Tran Mills   PetscFunctionBegin;
648df555b71SRichard Tran Mills 
64938987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
65038987b35SRichard Tran Mills   if (!a->nz) {
6519566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6529566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, n));
6539566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
65438987b35SRichard Tran Mills     PetscFunctionReturn(0);
65538987b35SRichard Tran Mills   }
656f36dfe3fSRichard Tran Mills 
6579566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
659df555b71SRichard Tran Mills 
6603fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6613fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6623fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6639566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6645f80ce2aSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
6653fa15762SRichard Tran Mills 
666df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
667df555b71SRichard Tran Mills   if (zz == yy) {
668df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
669df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
670792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
671df555b71SRichard Tran Mills   } else {
672df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
673df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
674792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
6755f80ce2aSJacob Faibussowitsch     for (i = 0; i < n; i++) z[i] += y[i];
676df555b71SRichard Tran Mills   }
677df555b71SRichard Tran Mills 
6789566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
681df555b71SRichard Tran Mills   PetscFunctionReturn(0);
682df555b71SRichard Tran Mills }
683d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
684df555b71SRichard Tran Mills 
685190ae7a4SRichard Tran Mills /* -------------------------- MatProduct code -------------------------- */
6868a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
687*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
688*d71ae5a4SJacob Faibussowitsch {
6891495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
690431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
691190ae7a4SRichard Tran Mills   PetscInt            nrows, ncols;
692431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
693431879ecSRichard Tran Mills   PetscObjectState    state;
694431879ecSRichard Tran Mills 
695431879ecSRichard Tran Mills   PetscFunctionBegin;
696190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
697190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
698190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
699190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
700190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
701190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
702190ae7a4SRichard Tran Mills 
7039566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7049566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7059566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7069566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
707431879ecSRichard Tran Mills   csrA                = a->csrA;
708431879ecSRichard Tran Mills   csrB                = b->csrA;
709431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
710431879ecSRichard Tran Mills 
711190ae7a4SRichard Tran Mills   if (csrA && csrB) {
712792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
713190ae7a4SRichard Tran Mills   } else {
714190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
715190ae7a4SRichard Tran Mills   }
716190ae7a4SRichard Tran Mills 
7179566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
718431879ecSRichard Tran Mills 
719431879ecSRichard Tran Mills   PetscFunctionReturn(0);
720431879ecSRichard Tran Mills }
721431879ecSRichard Tran Mills 
722*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
723*d71ae5a4SJacob Faibussowitsch {
7241495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
725e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
726e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
727e8be1fc7SRichard Tran Mills   PetscObjectState    state;
728e8be1fc7SRichard Tran Mills 
729e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7309566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7319566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7329566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7339566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
734e8be1fc7SRichard Tran Mills   csrA                = a->csrA;
735e8be1fc7SRichard Tran Mills   csrB                = b->csrA;
736e8be1fc7SRichard Tran Mills   csrC                = c->csrA;
737e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
738e8be1fc7SRichard Tran Mills 
739190ae7a4SRichard Tran Mills   if (csrA && csrB) {
740792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
741190ae7a4SRichard Tran Mills   } else {
742190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
743190ae7a4SRichard Tran Mills   }
744e8be1fc7SRichard Tran Mills 
745e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7469566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
747e8be1fc7SRichard Tran Mills 
748e8be1fc7SRichard Tran Mills   PetscFunctionReturn(0);
749e8be1fc7SRichard Tran Mills }
750e8be1fc7SRichard Tran Mills 
751*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
752*d71ae5a4SJacob Faibussowitsch {
753190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7549566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
755190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
756190ae7a4SRichard Tran Mills }
757190ae7a4SRichard Tran Mills 
758*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
759*d71ae5a4SJacob Faibussowitsch {
760190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7619566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
762190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
763190ae7a4SRichard Tran Mills }
764190ae7a4SRichard Tran Mills 
765*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
766*d71ae5a4SJacob Faibussowitsch {
767190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7689566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
769190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
770190ae7a4SRichard Tran Mills }
771190ae7a4SRichard Tran Mills 
772*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
773*d71ae5a4SJacob Faibussowitsch {
774190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7759566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
776190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
777190ae7a4SRichard Tran Mills }
778190ae7a4SRichard Tran Mills 
779*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
780*d71ae5a4SJacob Faibussowitsch {
781190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7829566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
783190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
784190ae7a4SRichard Tran Mills }
785190ae7a4SRichard Tran Mills 
786*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
787*d71ae5a4SJacob Faibussowitsch {
788190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7899566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
790190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
791190ae7a4SRichard Tran Mills }
792190ae7a4SRichard Tran Mills 
793*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794*d71ae5a4SJacob Faibussowitsch {
795190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
796190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
797190ae7a4SRichard Tran Mills 
798190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7999566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
800190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
801190ae7a4SRichard Tran Mills }
802190ae7a4SRichard Tran Mills 
803*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
804*d71ae5a4SJacob Faibussowitsch {
805190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
806190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
807190ae7a4SRichard Tran Mills   PetscReal    fill = product->fill;
808190ae7a4SRichard Tran Mills 
809190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8109566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
811190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
812190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
813190ae7a4SRichard Tran Mills }
814190ae7a4SRichard Tran Mills 
815*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
816*d71ae5a4SJacob Faibussowitsch {
817190ae7a4SRichard Tran Mills   Mat                 Ct;
818190ae7a4SRichard Tran Mills   Vec                 zeros;
8191495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
8204f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8214f53af40SRichard Tran Mills   PetscBool           set, flag;
822b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8234f53af40SRichard Tran Mills   PetscObjectState    state;
8244f53af40SRichard Tran Mills 
8254f53af40SRichard Tran Mills   PetscFunctionBegin;
8269566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
827b94d7dedSBarry Smith   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8284f53af40SRichard Tran Mills 
8299566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8309566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8319566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8329566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8334f53af40SRichard Tran Mills   csrA                = a->csrA;
8344f53af40SRichard Tran Mills   csrP                = p->csrA;
8354f53af40SRichard Tran Mills   csrC                = c->csrA;
836b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
837190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
838b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8394f53af40SRichard Tran Mills 
840f8990b4aSRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
841792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
8424f53af40SRichard Tran Mills 
843190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
844190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
845190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8467301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
847190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
848190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
849190ae7a4SRichard Tran Mills    * the full matrix. */
8509566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8519566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
8529566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C, &zeros, NULL));
8539566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8549566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8559566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
8569566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
857190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
8589566063dSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C));
8599566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
8609566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8619566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8634f53af40SRichard Tran Mills   PetscFunctionReturn(0);
8644f53af40SRichard Tran Mills }
865190ae7a4SRichard Tran Mills 
866*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
867*d71ae5a4SJacob Faibussowitsch {
868190ae7a4SRichard Tran Mills   Mat_Product        *product = C->product;
869190ae7a4SRichard Tran Mills   Mat                 A = product->A, P = product->B;
8701495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
871190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
872190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
873190ae7a4SRichard Tran Mills   PetscObjectState    state;
874190ae7a4SRichard Tran Mills 
875190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8769566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8779566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8789566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8799566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
880190ae7a4SRichard Tran Mills   csrA                = a->csrA;
881190ae7a4SRichard Tran Mills   csrP                = p->csrA;
882190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
883190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
884190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
885190ae7a4SRichard Tran Mills 
886190ae7a4SRichard Tran Mills   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
887190ae7a4SRichard Tran Mills   if (csrP && csrA) {
888792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
889190ae7a4SRichard Tran Mills   } else {
890190ae7a4SRichard Tran Mills     csrC = PETSC_NULL;
891190ae7a4SRichard Tran Mills   }
892190ae7a4SRichard Tran Mills 
893190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
894190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
89549ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
89649ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
89749ba5396SRichard Tran Mills    * is guaranteed. */
8989566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
899190ae7a4SRichard Tran Mills 
900190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
901190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
902190ae7a4SRichard Tran Mills }
903190ae7a4SRichard Tran Mills 
904*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
905*d71ae5a4SJacob Faibussowitsch {
906190ae7a4SRichard Tran Mills   PetscFunctionBegin;
907190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
908190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
909190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
910190ae7a4SRichard Tran Mills }
911190ae7a4SRichard Tran Mills 
912*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
913*d71ae5a4SJacob Faibussowitsch {
914190ae7a4SRichard Tran Mills   PetscFunctionBegin;
915190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
916190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
917190ae7a4SRichard Tran Mills }
918190ae7a4SRichard Tran Mills 
919*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
920*d71ae5a4SJacob Faibussowitsch {
921190ae7a4SRichard Tran Mills   PetscFunctionBegin;
922190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
923190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
924190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
925190ae7a4SRichard Tran Mills }
926190ae7a4SRichard Tran Mills 
927*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
928*d71ae5a4SJacob Faibussowitsch {
929190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
930190ae7a4SRichard Tran Mills   Mat          A       = product->A;
931190ae7a4SRichard Tran Mills   PetscBool    set, flag;
932190ae7a4SRichard Tran Mills 
933190ae7a4SRichard Tran Mills   PetscFunctionBegin;
934a3d67537SPierre Jolivet   if (PetscDefined(USE_COMPLEX)) {
9352ab6f6a8SStefano Zampini     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
9362ab6f6a8SStefano Zampini      * We do this in several other locations in this file. This works for the time being, but these
937190ae7a4SRichard Tran Mills      * routines are considered unsafe and may be removed from the MatProduct code in the future.
9382ab6f6a8SStefano Zampini      * TODO: Add proper MATSEQAIJMKL implementations */
939190ae7a4SRichard Tran Mills     C->ops->productsymbolic = NULL;
940a3d67537SPierre Jolivet   } else {
941190ae7a4SRichard Tran Mills     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9429566063dSJacob Faibussowitsch     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
943a3d67537SPierre Jolivet     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
944a3d67537SPierre Jolivet     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9451495fedeSRichard Tran Mills     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
946190ae7a4SRichard Tran Mills      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
947a3d67537SPierre Jolivet   }
948190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
949190ae7a4SRichard Tran Mills }
950190ae7a4SRichard Tran Mills 
951*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
952*d71ae5a4SJacob Faibussowitsch {
953190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9542ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
955190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
956190ae7a4SRichard Tran Mills }
957190ae7a4SRichard Tran Mills 
958*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
959*d71ae5a4SJacob Faibussowitsch {
960190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9612ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
962190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
963190ae7a4SRichard Tran Mills }
964190ae7a4SRichard Tran Mills 
965*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
966*d71ae5a4SJacob Faibussowitsch {
967190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
968190ae7a4SRichard Tran Mills 
969190ae7a4SRichard Tran Mills   PetscFunctionBegin;
970190ae7a4SRichard Tran Mills   switch (product->type) {
971*d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
972*d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
973*d71ae5a4SJacob Faibussowitsch     break;
974*d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AtB:
975*d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
976*d71ae5a4SJacob Faibussowitsch     break;
977*d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
978*d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
979*d71ae5a4SJacob Faibussowitsch     break;
980*d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_PtAP:
981*d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
982*d71ae5a4SJacob Faibussowitsch     break;
983*d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_RARt:
984*d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
985*d71ae5a4SJacob Faibussowitsch     break;
986*d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABC:
987*d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
988*d71ae5a4SJacob Faibussowitsch     break;
989*d71ae5a4SJacob Faibussowitsch   default:
990*d71ae5a4SJacob Faibussowitsch     break;
991190ae7a4SRichard Tran Mills   }
992190ae7a4SRichard Tran Mills   PetscFunctionReturn(0);
993190ae7a4SRichard Tran Mills }
994431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
995190ae7a4SRichard Tran Mills /* ------------------------ End MatProduct code ------------------------ */
9964f53af40SRichard Tran Mills 
9974a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
998510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
9994a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
10004a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
1001*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1002*d71ae5a4SJacob Faibussowitsch {
10034a2a386eSRichard Tran Mills   Mat            B = *newmat;
10044a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
1005c9d46305SRichard Tran Mills   PetscBool      set;
1006e9c94282SRichard Tran Mills   PetscBool      sametype;
10074a2a386eSRichard Tran Mills 
10084a2a386eSRichard Tran Mills   PetscFunctionBegin;
10099566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
10104a2a386eSRichard Tran Mills 
10119566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1012e9c94282SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
1013e9c94282SRichard Tran Mills 
10144dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aijmkl));
10154a2a386eSRichard Tran Mills   B->spptr = (void *)aijmkl;
10164a2a386eSRichard Tran Mills 
1017df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
1018969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
10194a2a386eSRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
10204a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
10214a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1022c9d46305SRichard Tran Mills 
10234abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1024ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1025d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1026a8327b06SKarl Rupp #else
1027d995685eSRichard Tran Mills   aijmkl->no_SpMV2 = PETSC_TRUE;
1028d995685eSRichard Tran Mills #endif
10295b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
10304abfa3b3SRichard Tran Mills 
10314abfa3b3SRichard Tran Mills   /* Parse command line options. */
1032d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
10339566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
10349566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1035d0609cedSBarry Smith   PetscOptionsEnd();
1036ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1037d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
10389566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1039d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1040d995685eSRichard Tran Mills   }
1041d995685eSRichard Tran Mills #endif
1042c9d46305SRichard Tran Mills 
1043ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1044df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1045969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1046df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1047969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
10488a369200SRichard Tran Mills   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1049190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1050190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1051190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1052190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054ffcab697SRichard Tran Mills     #if !defined(PETSC_USE_COMPLEX)
105549ba5396SRichard Tran Mills   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1056190ae7a4SRichard Tran Mills     #else
1057190ae7a4SRichard Tran Mills   B->ops->ptapnumeric = NULL;
10584f53af40SRichard Tran Mills     #endif
1059e8be1fc7SRichard Tran Mills   #endif
10601950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10611950a7e7SRichard Tran Mills 
1062213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1063213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1064213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1065213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1066213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10671950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10684a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1069969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10704a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1071969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1072c9d46305SRichard Tran Mills   }
10731950a7e7SRichard Tran Mills #endif
10744a2a386eSRichard Tran Mills 
10759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
10764a2a386eSRichard Tran Mills 
10779566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
10784a2a386eSRichard Tran Mills   *newmat = B;
10794a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
10804a2a386eSRichard Tran Mills }
10814a2a386eSRichard Tran Mills 
10824a2a386eSRichard Tran Mills /*@C
108311a5261eSBarry Smith    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
108411a5261eSBarry Smith    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
10854a2a386eSRichard Tran Mills    routines from Intel MKL whenever possible.
108690147e49SRichard Tran Mills    If the installed version of MKL supports the "SpMV2" sparse
108790147e49SRichard Tran Mills    inspector-executor routines, then those are used by default.
108811a5261eSBarry Smith    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
108911a5261eSBarry Smith    (for symmetric A) operations are currently supported.
109011a5261eSBarry Smith    Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
109190147e49SRichard Tran Mills 
1092d083f849SBarry Smith    Collective
10934a2a386eSRichard Tran Mills 
10944a2a386eSRichard Tran Mills    Input Parameters:
109511a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
10964a2a386eSRichard Tran Mills .  m - number of rows
10974a2a386eSRichard Tran Mills .  n - number of columns
10984a2a386eSRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
10994a2a386eSRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
11004a2a386eSRichard Tran Mills          (possibly different for each row) or NULL
11014a2a386eSRichard Tran Mills 
11024a2a386eSRichard Tran Mills    Output Parameter:
11034a2a386eSRichard Tran Mills .  A - the matrix
11044a2a386eSRichard Tran Mills 
110590147e49SRichard Tran Mills    Options Database Keys:
110666b7eeb6SRichard Tran Mills +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
110766b7eeb6SRichard Tran Mills -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
110890147e49SRichard Tran Mills 
110911a5261eSBarry Smith    Note:
11104a2a386eSRichard Tran Mills    If nnz is given then nz is ignored
11114a2a386eSRichard Tran Mills 
11124a2a386eSRichard Tran Mills    Level: intermediate
11134a2a386eSRichard Tran Mills 
1114db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
11154a2a386eSRichard Tran Mills @*/
1116*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1117*d71ae5a4SJacob Faibussowitsch {
11184a2a386eSRichard Tran Mills   PetscFunctionBegin;
11199566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
11209566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
11219566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJMKL));
11229566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
11234a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
11244a2a386eSRichard Tran Mills }
11254a2a386eSRichard Tran Mills 
1126*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1127*d71ae5a4SJacob Faibussowitsch {
11284a2a386eSRichard Tran Mills   PetscFunctionBegin;
11299566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
11309566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
11314a2a386eSRichard Tran Mills   PetscFunctionReturn(0);
11324a2a386eSRichard Tran Mills }
1133